5
5
var = VariableClass ()
6
6
7
7
8
- def process_frame (frame , project , video_out = '' , frames_out = '' ):
8
+ def process_frame (frame , project , cv2 = None , frames_out = '' ):
9
9
# Perform object classification on the frame.
10
10
# persist=True -> The tracking results are stored in the model.
11
11
# persist should be kept True, as this provides unique IDs for each detection.
@@ -32,7 +32,7 @@ def process_frame(frame, project, video_out='', frames_out=''):
32
32
total_time_class_prediction += time .time () - start_time_class_prediction
33
33
34
34
if len (cur_results [0 ]) == 0 :
35
- return frame , total_time_class_prediction , False , labels_and_boxes
35
+ return None , labels_and_boxes , None , total_time_class_prediction , False
36
36
37
37
total_results .append (cur_results [0 ])
38
38
@@ -49,15 +49,16 @@ def process_frame(frame, project, video_out='', frames_out=''):
49
49
# Since we have over 1k videos per day, the dataset we collect need to be high-quality
50
50
# Valid image need to:
51
51
# + Have at least MIN_DETECTIONS objects detected:
52
- # + Have to have helmet (since we are lacking of helmet dataset)
52
+ # + Have to satisfy the project.condition_func which defines custom condition logics for every specific project.
53
53
if project .condition_func (total_results ):
54
54
for index , results in enumerate (total_results ):
55
55
# As a convention we will store all result labels under model1's
56
56
# The other models' will be mapped accordingly
57
57
if not combined_results :
58
- combined_results += [(box .xywhn , box .cls , box .conf ) for box in results .boxes ]
58
+ combined_results += [(box .xywhn , box .xyxy , box . cls , box .conf ) for box in results .boxes ]
59
59
else :
60
- combined_results += [(box .xywhn , project .map_to_first_model (index , box .cls ), box .conf ) for box in results .boxes ]
60
+ combined_results += [(box .xywhn , box .xyxy , project .map_to_first_model (index , box .cls ), box .conf ) for box
61
+ in results .boxes ]
61
62
62
63
# sort results based on descending confidences
63
64
sorted_combined_results = sorted (combined_results , key = lambda x : x [2 ], reverse = True )
@@ -68,7 +69,7 @@ def process_frame(frame, project, video_out='', frames_out=''):
68
69
for element in sorted_combined_results :
69
70
add_flag = True
70
71
for res in combined_results :
71
- if res [1 ] == element [1 ]:
72
+ if res [2 ] == element [2 ]: # classes comparison
72
73
if (abs (res [0 ][0 ][0 ] - element [0 ][0 ][0 ]) < 0.01
73
74
and (abs (res [0 ][0 ][1 ] - element [0 ][0 ][1 ]) < 0.01 )):
74
75
add_flag = False
@@ -78,8 +79,103 @@ def process_frame(frame, project, video_out='', frames_out=''):
78
79
# If the combined result has at least MIN_DETECTIONS boxes found (Could belong to either class)
79
80
if len (combined_results ) >= var .MIN_DETECTIONS :
80
81
print ("Condition met, we are gathering the labels and boxes and return results" )
81
- for xywhn , cls , _ in combined_results :
82
- labels_and_boxes += f'{ int (cls )} { xywhn [0 , 0 ].item ()} { xywhn [0 , 1 ].item ()} { xywhn [0 , 2 ].item ()} { xywhn [0 , 3 ].item ()} \n '
83
- return frame , total_time_class_prediction , True , labels_and_boxes
82
+ # Crop frane to get only the interested area to reduce storage waste
83
+ cropped_frame , cropped_coordinate = __crop_frame__ (frame , combined_results )
84
84
85
- return frame , total_time_class_prediction , False , labels_and_boxes
85
+ # <For testing> if you want to check if the labels
86
+ # are transformed and applied correctly to the cropped frame -> uncomment the line below
87
+ labeled_frame = None
88
+ # labeled_frame = __get_labeled_frame__(cropped_frame, cropped_coordinate, cv2, combined_results)
89
+
90
+ # Transform the labels and boxes accordingly
91
+ labels_and_boxes = __transform_labels__ (cropped_frame , cropped_coordinate , combined_results )
92
+ total_time_class_prediction += time .time () - start_time_class_prediction
93
+ return cropped_frame , labels_and_boxes , labeled_frame , total_time_class_prediction , True
94
+
95
+ return None , labels_and_boxes , None , total_time_class_prediction , False
96
+
97
+
98
+ def __crop_frame__ (frame , combined_results , padding = 100 ):
99
+ """
100
+ Crop frame to get only the interesting area, meanwhile it removes the background that doesn't have any detection.
101
+
102
+ Args:
103
+ frame: The original frame to be processed.
104
+ combined_results: List of results detected by models.
105
+ padding: Add some space padding to the cropped frame to avoid object cutoff.
106
+ """
107
+ # If the combined result has at least MIN_DETECTIONS boxes found
108
+ if len (combined_results ) >= var .MIN_DETECTIONS :
109
+ # Initialize bounding box limits
110
+ x1_min , y1_min , x2_max , y2_max = float ('inf' ), float ('inf' ), float ('-inf' ), float ('-inf' )
111
+
112
+ for _ , xyxy , _ , _ in combined_results :
113
+ x1 , y1 , x2 , y2 = xyxy [0 ]
114
+ x1_min , y1_min = min (x1_min , x1 ), min (y1_min , y1 )
115
+ x2_max , y2_max = max (x2_max , x2 ), max (y2_max , y2 )
116
+
117
+ # Apply padding to the bounding box
118
+ orig_height , orig_width = frame .shape [:2 ]
119
+ x1_min = int (max (0 , x1_min - padding ))
120
+ y1_min = int (max (0 , y1_min - padding ))
121
+ x2_max = int (min (orig_width , x2_max + padding ))
122
+ y2_max = int (min (orig_height , y2_max + padding ))
123
+
124
+ # Crop the frame to the union bounding box with padding
125
+ cropped_frame = frame [y1_min :y2_max , x1_min :x2_max ]
126
+
127
+ return cropped_frame , (x1_min , y1_min , x2_max , y2_max )
128
+
129
+
130
+ def __transform_labels__ (cropped_frame , cropped_coordinate , combined_results ):
131
+ """
132
+ Transform the labels and boxes coordinates to match with the cropped frame.
133
+
134
+ Args:
135
+ cropped_frame: The cropped frame to transform labels.
136
+ cropped_coordinate: Cropped coordinate of the frame (in xyxy format)
137
+ combined_results: List of results detected by models.
138
+ """
139
+ labels_and_boxes = ''
140
+ frame_width , frame_height = cropped_frame .shape [:2 ]
141
+
142
+ for _ , xyxy , cls , conf in combined_results :
143
+ x1 , y1 , x2 , y2 = xyxy [0 ]
144
+ x1 , y1 , x2 , y2 = int (abs (x1 - cropped_coordinate [0 ])), int (abs (y1 - cropped_coordinate [1 ])), int (abs (x2 - cropped_coordinate [0 ])), int (abs (y2 - cropped_coordinate [1 ]))
145
+
146
+ x_center = (x1 + x2 ) / 2
147
+ y_center = (y1 + y2 ) / 2
148
+
149
+ # Calculate the xywhn values (requirement for ultralytics YOLO models dataset)
150
+ x_center_norm = x_center / frame_width
151
+ y_center_norm = y_center / frame_height
152
+ width_norm = (x2 - x1 ) / frame_width
153
+ height_norm = (y2 - y1 ) / frame_height
154
+
155
+ labels_and_boxes += f'{ int (cls )} { x_center_norm } { y_center_norm } { width_norm } { height_norm } \n '
156
+
157
+ return labels_and_boxes
158
+
159
+
160
+ def __get_labeled_frame__ (cropped_frame , cropped_coordinate , cv2 , combined_results ):
161
+ """
162
+ <Used for testing if you want to see the labeled frame>
163
+ Return the cropped frame with transformed labeled applied on the frame.
164
+
165
+ Args:
166
+ cropped_frame: The cropped frame to transform labels.
167
+ cropped_coordinate: Cropped coordinate of the frame (in xyxy format)
168
+ cv2: The Capture Video agent,
169
+ combined_results: List of results detected by models.
170
+ """
171
+ labeled_frame = cropped_frame .copy ()
172
+ for _ , xyxy , cls , _ in combined_results :
173
+ x1 , y1 , x2 , y2 = xyxy [0 ]
174
+ x1 , y1 , x2 , y2 = int (abs (x1 - cropped_coordinate [0 ])), int (abs (y1 - cropped_coordinate [1 ])), int (abs (x2 - cropped_coordinate [0 ])), int (abs (y2 - cropped_coordinate [1 ]))
175
+ print (f"Box: { xyxy } , Class: { int (cls )} " )
176
+ print (f"Width: { x2 - x1 } and height: { y2 - y1 } " )
177
+ cv2 .rectangle (labeled_frame , (x1 , y1 ), (x2 , y2 ), (0 , 255 , 0 ), 2 )
178
+ cv2 .putText (labeled_frame , f'{ int (cls )} ' , (x1 - 10 , y1 - 20 ),
179
+ cv2 .FONT_HERSHEY_SIMPLEX , 2 , (0 , 255 , 0 ), 2 )
180
+
181
+ return labeled_frame
0 commit comments