Skip to content

Commit 9a3cf2a

Browse files
committed
diffbin utils function and testfile
1 parent de3aaae commit 9a3cf2a

File tree

3 files changed

+307
-216
lines changed

3 files changed

+307
-216
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,52 @@
11
import keras
2-
import numpy as np
3-
import tensorflow as tf
42

53

4+
class Point:
5+
def __init__(self, x, y):
6+
self.x = x
7+
self.y = y
8+
9+
def __add__(self, other):
10+
return Point(self.x + other.x, self.y + other.y)
11+
12+
def __sub__(self, other):
13+
return Point(self.x - other.x, self.y - other.y)
14+
15+
def __neg__(self):
16+
return Point(-self.x, -self.y)
17+
18+
def cross(self, other):
19+
return self.x * other.y - self.y * other.x
20+
21+
def to_tuple(self):
22+
return (self.x, self.y)
23+
24+
25+
def shrink_polygan(polygon, offset):
26+
"""
27+
Shrinks a polygon inward by moving each point toward the center.
28+
"""
29+
if len(polygon) < 3:
30+
return polygon
31+
32+
if not isinstance(polygon[0], Point):
33+
polygon = [Point(p[0], p[1]) for p in polygon]
34+
35+
cx = sum(p.x for p in polygon) / len(polygon)
36+
cy = sum(p.y for p in polygon) / len(polygon)
37+
38+
shrunk = []
39+
for p in polygon:
40+
dx = p.x - cx
41+
dy = p.y - cy
42+
norm = max((dx**2 + dy**2) ** 0.5, 1e-6)
43+
shrink_ratio = max(0, 1 - offset / norm)
44+
shrunk.append(Point(cx + dx * shrink_ratio, cy + dy * shrink_ratio))
45+
46+
return shrunk
47+
48+
49+
# Polygon Area
650
def Polygon(coords):
751
"""
852
Calculate the area of a polygon using the Shoelace formula.
@@ -18,18 +62,7 @@ def Polygon(coords):
1862
return area
1963

2064

21-
def shrink_polygan(poly, factor):
22-
"""
23-
Shrink polygan inwards by a scaling its coordinated towards centroid
24-
"""
25-
poly = keras.ops.convert_to_tensor(poly, dtype="float32")
26-
centroid = keras.ops.mean(poly, axis=0) # Compute centroid
27-
28-
# Correct scaling towards centroid
29-
shrinked_poly = centroid + (poly - centroid) * factor
30-
return shrinked_poly
31-
32-
65+
# binary search smallest width
3366
def binary_search_smallest_width(poly):
3467
"""
3568
The function aims maximum amount by which polygan can be shrunk by
@@ -38,14 +71,14 @@ def binary_search_smallest_width(poly):
3871
if len(poly) < 3:
3972
return 0
4073

41-
low, high = (
42-
0,
43-
1,
44-
) # Scale factor (1 = original size, 0 = collapsed to centroid)
74+
low, high = 0, 1
4575

46-
while high - low > 0.01: # Precision threshold
76+
while high - low > 0.01:
4777
mid = (high + low) / 2
4878
mid_poly = shrink_polygan(poly, mid)
79+
mid_poly = keras.ops.cast(
80+
keras.ops.stack([[p.x, p.y] for p in mid_poly]), dtype="float32"
81+
)
4982
area = Polygon(mid_poly)
5083

5184
if area > 0.1:
@@ -58,6 +91,7 @@ def binary_search_smallest_width(poly):
5891
return int(height) if height >= 0.1 else 0
5992

6093

94+
# project point to line
6195
def project_point_to_line(x, u, v, axis=0):
6296
"""
6397
Projects a point x onto the line defined by points u and v
@@ -68,12 +102,13 @@ def project_point_to_line(x, u, v, axis=0):
68102

69103
n = v - u
70104
n = n / (
71-
keras.ops.norm(n, axis=axis, keepdims=True) + np.finfo(np.float32).eps
105+
keras.ops.norm(n, axis=axis, keepdims=True) + keras.backend.epsilon()
72106
)
73107
p = u + n * keras.ops.sum((x - u) * n, axis=axis, keepdims=True)
74108
return p
75109

76110

111+
# project_point_to_segment
77112
def project_point_to_segment(x, u, v, axis=0):
78113
"""
79114
Projects a point x onto the line segment defined by points u and v
@@ -90,30 +125,17 @@ def project_point_to_segment(x, u, v, axis=0):
90125
return o
91126

92127

128+
# get line of height
93129
def get_line_height(poly):
94-
"""
95-
Get the height of the line defined by the polygan
96-
"""
97130
return binary_search_smallest_width(poly)
98131

99132

100-
def line_segment_intersection(x, y, polygon):
133+
# cv2.fillpoly function with keras.ops
134+
def fill_poly_keras(vertices, image_shape):
101135
"""
136+
Fill a polygon using the cv2.fillPoly function with keras.ops.
102137
Ray-casting algorithm to determine if a point is inside a polygon.
103-
https://medium.com/@girishajmera/exploring-algorithms-to-determine-points-inside-or-outside-a-polygon-038952946f87
104138
"""
105-
inside = False
106-
num_vertices = len(polygon)
107-
for i in range(num_vertices):
108-
x1, y1 = polygon[i]
109-
x2, y2 = polygon[(i + 1) % num_vertices]
110-
if (y1 > y) != (y2 > y) and x < x1 + (y - y1) * (x2 - x1) / (y2 - y1):
111-
inside = not inside
112-
return inside
113-
114-
115-
def fill_poly(vertices, image_shape):
116-
"""Fills a polygon using ray casting."""
117139
height, width = image_shape
118140
x = keras.ops.arange(width)
119141
y = keras.ops.arange(height)
@@ -122,22 +144,29 @@ def fill_poly(vertices, image_shape):
122144
yy = keras.ops.cast(yy, "float32")
123145

124146
result = keras.ops.zeros((height, width), dtype="float32")
125-
xx_flat = keras.ops.reshape(xx, (-1,))
126-
yy_flat = keras.ops.reshape(yy, (-1,))
127-
128-
inside_mask = [
129-
line_segment_intersection(xx_flat[i], yy_flat[i], vertices)
130-
for i in range(xx_flat.shape[0])
131-
]
132-
inside_mask_tensor = keras.ops.reshape(
133-
keras.ops.convert_to_tensor(inside_mask, dtype="bool"), (height, width)
134-
)
135-
result = keras.ops.where(
136-
inside_mask_tensor, keras.ops.ones_like(result), result
137-
)
147+
148+
vertices = keras.ops.convert_to_tensor(vertices, dtype="float32")
149+
num_vertices = vertices.shape[0]
150+
151+
for i in range(num_vertices):
152+
x1, y1 = vertices[i]
153+
x2, y2 = vertices[(i + 1) % num_vertices]
154+
155+
# Modified conditions to potentially include more boundary pixels
156+
cond1 = (yy > keras.ops.minimum(y1, y2)) & (
157+
yy <= keras.ops.maximum(y1, y2)
158+
)
159+
cond2 = xx < (x1 + (yy - y1) * (x2 - x1) / (y2 - y1))
160+
161+
result = keras.ops.where(
162+
cond1 & cond2 & ((y1 > yy) != (y2 > yy)), 1 - result, result
163+
)
164+
165+
result = keras.ops.cast(result, "int32")
138166
return result
139167

140168

169+
# get mask
141170
def get_mask(w, h, polys, ignores):
142171
"""
143172
Generates a binary mask where:
@@ -147,67 +176,30 @@ def get_mask(w, h, polys, ignores):
147176
mask = keras.ops.ones((h, w), dtype="float32")
148177

149178
for poly, ignore in zip(polys, ignores):
150-
poly = np.array(poly, np.int32)
179+
poly = keras.ops.cast(keras.ops.convert_to_numpy(poly), dtype="int32")
151180

152181
if poly.shape[0] < 3:
153182
print("Skipping invalid polygon:", poly)
154183
continue
155184

156185
fill_value = 0.0 if ignore else 1.0
157-
poly_mask = fill_poly(poly, (h, w))
186+
poly_mask = fill_poly_keras(poly, (h, w))
158187

159188
if ignore:
160189
mask = keras.ops.where(
161-
poly_mask == 1.0, keras.ops.zeros_like(mask), mask
190+
keras.ops.cast(poly_mask, "float32") == 1.0,
191+
keras.ops.zeros_like(mask),
192+
mask,
162193
)
163194
else:
164195
mask = keras.ops.maximum(mask, poly_mask)
165196
return mask
166197

167198

168-
def get_region_coordinate(w, h, polys, heights, shrink):
169-
"""
170-
Extract coordinates of regions corresponding to text lines in image using keras.ops.
171-
"""
172-
label_map = keras.ops.zeros((h, w), dtype="int32")
173-
174-
for line_id, (poly, height) in enumerate(zip(polys, heights)):
175-
if height > 0:
176-
shrinked_poly = shrink_polygan(poly, 1 - height * shrink)
177-
mask = fill_poly(shrinked_poly, (h, w))
178-
label_map = keras.ops.where(
179-
mask > 0,
180-
(line_id + 1) * keras.ops.ones_like(label_map),
181-
label_map,
182-
)
183-
184-
indices = keras.ops.convert_to_tensor(keras.ops.where(label_map > 0))
185-
if keras.ops.shape(indices)[0] == 0:
186-
return [np.zeros((0, 2), "int32")]
187-
188-
label_map_flat = keras.ops.reshape(label_map, (-1,))
189-
flattened_indices = indices[..., 0] * w + indices[..., 1]
190-
region_labels = keras.ops.take(label_map_flat, flattened_indices)
191-
unique_labels, _ = tf.unique(region_labels)
192-
unique_labels = keras.ops.convert_to_tensor(unique_labels)
193-
194-
regions_coords = []
195-
196-
for label in unique_labels:
197-
region_idx = keras.ops.where(label_map == label)
198-
region_idx = keras.ops.convert_to_tensor(region_idx)
199-
200-
coords = keras.ops.stack(
201-
[region_idx[..., 1], region_idx[..., 0]], axis=-1
202-
)
203-
regions_coords.append(coords)
204-
205-
return regions_coords
206-
207-
199+
# get polygan coordinates projection
208200
def get_coords_poly_projection(coords, poly):
209201
"""
210-
This projects set of points onto edges of a polygan and return closest
202+
This projects set of points onto edges of a polygan and return closest
211203
projected points
212204
"""
213205
start_points = keras.ops.array(poly, dtype="float32")
@@ -241,14 +233,16 @@ def get_coords_poly_projection(coords, poly):
241233
return best_projected_points
242234

243235

244-
def get_coords_poly_distance_keras(coords, poly):
236+
# get polygan coordinates distance
237+
def get_coords_poly_distance(coords, poly):
245238
"""
246239
This function calculates distance between set of points and polygan
247240
"""
248241
projection = get_coords_poly_projection(coords, poly)
249242
return keras.ops.linalg.norm(projection - coords, axis=1)
250243

251244

245+
# get normalized weight
252246
def get_normalized_weight(heatmap, mask, background_weight=3.0):
253247
"""
254248
This function calculates normalized weight of heatmap
@@ -269,7 +263,50 @@ def get_normalized_weight(heatmap, mask, background_weight=3.0):
269263
wpos = (keras.ops.cast(nneg, dtype="float32") + smooth) / (
270264
keras.ops.cast(npos, dtype="float32") + smooth
271265
)
272-
weight = np.zeros_like(heatmap)
273-
weight[keras.ops.cast(neg, dtype="bool")] = background_weight
274-
weight[keras.ops.cast(pos, dtype="bool")] = wpos
266+
weight = keras.ops.zeros_like(heatmap)
267+
# weight[keras.ops.cast(neg, dtype="bool")] = background_weight
268+
neg = keras.ops.cast(neg, "bool")
269+
weight = keras.ops.where(neg, background_weight, weight)
270+
pos = keras.ops.cast(pos, "bool")
271+
weight = keras.ops.where(pos, wpos, weight)
275272
return weight
273+
274+
275+
# Getting region coordinates
276+
def get_region_coordinate(w, h, poly, heights, shrink):
277+
"""
278+
Extract coordinates of regions corresponding to text lines in an image.
279+
"""
280+
label_map = keras.ops.zeros((h, w), dtype="float32")
281+
for line_id, (p, height) in enumerate(zip(poly, heights)):
282+
if height > 0:
283+
poly_points = [Point(row[0], row[1]) for row in p]
284+
shrinked_poly = shrink_polygan(poly_points, height * shrink)
285+
shrunk_poly_tuples = [point.to_tuple() for point in shrinked_poly]
286+
shrunk_poly_tensor = keras.ops.convert_to_tensor(
287+
shrunk_poly_tuples, dtype="float32"
288+
)
289+
filled_polygon = fill_poly_keras(shrunk_poly_tensor, (h, w))
290+
label_map = keras.ops.maximum(label_map, filled_polygon)
291+
292+
label_map = keras.ops.convert_to_tensor(label_map)
293+
sorted_tensor = keras.ops.sort(keras.ops.reshape(label_map, (-1,)))
294+
diff = keras.ops.concatenate(
295+
[
296+
keras.ops.convert_to_tensor([True]),
297+
(sorted_tensor[1:] != sorted_tensor[:-1]),
298+
]
299+
)
300+
diff = keras.ops.reshape(diff, (-1,))
301+
indices = keras.ops.convert_to_tensor(keras.ops.where(diff))
302+
indices = keras.ops.reshape(indices, (-1,))
303+
unique_labels = keras.ops.take(sorted_tensor, indices)
304+
unique_labels = unique_labels[unique_labels != 0]
305+
regions_coords = []
306+
for label in unique_labels:
307+
mask = keras.ops.equal(label_map, label)
308+
y, x = keras.ops.nonzero(mask)
309+
coords = keras.ops.stack([x, y], axis=-1)
310+
regions_coords.append(keras.ops.convert_to_numpy(coords))
311+
312+
return regions_coords

0 commit comments

Comments
 (0)