add evaluation

wbw520 · wbw520 · commit 574e22286bfc · 2023-01-17T14:44:16.000+09:00
diff --git a/configs.py b/configs.py
@@ -6,9 +6,9 @@ def get_args_parser():
 
     # train settings
     parser.add_argument("--dataset", type=str, default="facade")
-    parser.add_argument("--model_name", type=str, default="PSPNet")
-    parser.add_argument("--pre_model", type=str, default="ViT-B_8.npz")
-    parser.add_argument("--batch_size", type=int, default=4,
+    parser.add_argument("--model_name", type=str, default="Segmenter")
+    parser.add_argument("--pre_model", type=str, default="ViT-B_16.npz")
+    parser.add_argument("--batch_size", type=int, default=1,
                         help="Number of images sent to the network in one step.")
     parser.add_argument("--root", type=str, default="/home/wangbowen/DATA/",
                         help="Path to the directory containing the image list.")
@@ -27,7 +27,7 @@ def get_args_parser():
     parser.add_argument("--weight_decay", type=float, default=1e-4, help="weight decay.")
 
     # VIT settings
-    parser.add_argument("--encoder", type=str, default="vit_base_patch8", help="name for encoder")
+    parser.add_argument("--encoder", type=str, default="vit_base_patch16", help="name for encoder")
     parser.add_argument("--decoder_embed_dim", type=int, default=512, help="dimension for decoder.")
     parser.add_argument("--decoder_depth", type=int, default=2, help="depth for decoder.")
     parser.add_argument("--decoder_num_head", type=int, default=8, help="head number for decoder.")
diff --git a/data/facade.py b/data/facade.py
@@ -31,11 +31,17 @@ def polygon2mask(self, img_size, polygons, rectangles):
         return mask
 
     # translate label_id to color img
-    def id2trainId(self, label):
+    def id2trainId(self, label, select=None):
         w, h = label.shape
         label_copy = np.zeros((w, h, 3), dtype=np.uint8)
         for index, color in colors.items():
-            label_copy[label == index] = color
+            if select is not None:
+                if index == select:
+                    label_copy[label == index] = color
+                else:
+                    continue
+            else:
+                label_copy[label == index] = color
         return label_copy.astype(np.uint8)
 
 
diff --git a/data/get_data_set.py b/data/get_data_set.py
@@ -24,7 +24,7 @@ def get_data(args, evaluation_setting=None):
         else:
             current_set = "val"
 
-        val_set = facade.Facade(args, 'test', joint_transform=joint_transformations_val,
+        val_set = facade.Facade(args, current_set, joint_transform=joint_transformations_val,
                                             standard_transform=standard_transformations)
         ignore_index = facade.ignore_label
         args.num_classes = facade.num_classes
diff --git a/evaluation.py b/evaluation.py
@@ -38,5 +38,4 @@ def main():
     os.makedirs('demo/', exist_ok=True)
     parser = argparse.ArgumentParser('model training and evaluation script', parents=[get_args_parser()])
     args = parser.parse_args()
-    img_path = "/home/wangbowen/DATA/Facade/translated_data/images/IMG_1287.png"
     main()
diff --git a/inference.py b/inference.py
@@ -15,16 +15,16 @@
 import os
 
 
-def show_single(image, location=None, save=False):
+def show_single(image, location=None, save=False, name=None):
     # show single image
     image = np.array(image, dtype=np.uint8)
     plt.imshow(image)
 
     plt.axis('off')
     plt.subplots_adjust(top=1, bottom=0, left=0, right=1, hspace=0, wspace=0)
     plt.margins(0, 0)
-    # if save:
-    #     plt.savefig("demo/" + img_name, bbox_inches='tight', pad_inches=0)
+    if save:
+        plt.savefig(name, bbox_inches='tight', pad_inches=0)
     plt.show()
 
 
@@ -48,16 +48,18 @@ def main():
 
     standard_transformations = get_standard_transformations()
     img = Image.open(img_path).convert('RGB')
+
     img = img.resize((args.setting_size[1], args.setting_size[0]), Image.BILINEAR)
     img = standard_transformations(img).to(device, dtype=torch.float32)
     pred, full_pred = inference_sliding(args, model, img.unsqueeze(0))
-    color_img = PolygonTrans().id2trainId(torch.squeeze(pred, dim=0).cpu().detach().numpy())
-    show_single(color_img, save=True)
+    color_img = PolygonTrans().id2trainId(torch.squeeze(pred, dim=0).cpu().detach().numpy(), select=2)
+    print(color_img.shape)
+    show_single(color_img, save=True, name="color_mask2.png")
 
 
 if __name__ == '__main__':
     os.makedirs('demo/', exist_ok=True)
     parser = argparse.ArgumentParser('model training and evaluation script', parents=[get_args_parser()])
     args = parser.parse_args()
-    img_path = "/home/wangbowen/DATA/Facade/translated_data/images/IMG_1287.png"
+    img_path = "/home/wangbowen/DATA/Facade/translated_data/images/32052284_477d66a5ae_o.png"
     main()
diff --git a/line_detection/connect_components.py b/line_detection/connect_components.py
@@ -0,0 +1,33 @@
+import cv2
+import numpy as np
+from inference import show_single
+
+
+img = cv2.imread("../demo/test.png")
+gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+ret, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
+
+kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
+bin_clo = cv2.erode(binary, kernel2, iterations=1)
+
+num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(bin_clo, connectivity=8)
+
+# print('num_labels = ', num_labels)
+# print('labels = ', labels)
+# # 不同的连通域赋予不同的颜色
+output = np.zeros((img.shape[0], img.shape[1], 3), np.uint8)
+
+for i in range(1, num_labels):
+    mask = labels == i
+    # 连通域的信息：对应各个轮廓的x、y、width、height和面积
+    print('stats = ', stats[i])
+    if stats[i][4] < 30:
+        continue
+    # 连通域的中心点
+    print('centroids = ', centroids[i])
+    output[:, :, 0][mask] = np.random.randint(0, 255)
+    output[:, :, 1][mask] = np.random.randint(0, 255)
+    output[:, :, 2][mask] = np.random.randint(0, 255)
+    break
+
+show_single(output)
diff --git a/line_detection/hough.py b/line_detection/hough.py
@@ -0,0 +1,31 @@
+import cv2
+import numpy as np
+from inference import show_single
+
+src = cv2.imread("/home/wangbowen/DATA/Facade/translated_data/images/IMG_E1283.png")
+# src = cv2.imread("../demo/test.png")
+gray_img = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
+
+
+dst = cv2.equalizeHist(gray_img)
+# 高斯滤波降噪
+gaussian = cv2.GaussianBlur(dst, (9, 9), 0)
+# cv.imshow("gaussian", gaussian)
+
+# 边缘检测
+edges = cv2.Canny(gaussian, 70, 150)
+show_single(edges)
+
+# Hough 直线检测
+# 重点注意第四个参数 阈值，只有累加后的值高于阈值时才被认为是一条直线，也可以把它看成能检测到的直线的最短长度（以像素点为单位）
+# 在霍夫空间理解为：至少有多少条正弦曲线交于一点才被认为是直线
+lines = cv2.HoughLinesP(edges, 1, 1 * np.pi / 180, 10, minLineLength=10, maxLineGap=5)#统计概率霍夫线变换函数：图像矩阵，极坐标两个参数，一条直线所需最少的曲线交点，组成一条直线的最少点的数量，被认为在一条直线上的亮点的最大距离
+print("Line Num : ", len(lines))
+
+# 画出检测的线段
+for line in lines:
+    for x1, y1, x2, y2 in line:
+        cv2.line(src, (x1, y1), (x2, y2), (255, 0, 0), 2)
+    pass
+
+show_single(src)
diff --git a/line_detection/line_revision.py b/line_detection/line_revision.py
@@ -0,0 +1,215 @@
+import cv2
+import numpy as np
+import math
+from inference import show_single
+from shapely.geometry import LineString
+
+
+def lsd():
+    src = cv2.imread("/home/wangbowen/DATA/Facade/translated_data/images/32052284_477d66a5ae_o.png")
+    gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
+    src = cv2.cvtColor(src, cv2.COLOR_BGR2RGB)
+    gray = cv2.GaussianBlur(gray, (5, 5), 5)
+    gray = cv2.GaussianBlur(gray, (3, 3), 5)
+
+    LSD = cv2.createLineSegmentDetector(0)
+    dlines = LSD.detect(gray)
+
+    line_record = []
+
+    for dline in dlines[0]:
+        x0 = int(round(dline[0][0]))
+        y0 = int(round(dline[0][1]))
+        x1 = int(round(dline[0][2]))
+        y1 = int(round(dline[0][3]))
+        line_record.append([x0, y0, x1, y1])
+
+    return line_record, src
+
+
+def calc_abc_from_line_2d(x0, y0, x1, y1):
+    a = y0-y1
+    b = x1-x0
+    c = x0*y1-x1*y0
+    return a, b, c
+
+
+def get_line_cross_point(line1, line2):
+    a0, b0, c0 = calc_abc_from_line_2d(*line1)
+    a1, b1, c1 = calc_abc_from_line_2d(*line2)
+    D = a0*b1-a1*b0
+    if D == 0:
+        return None
+    x = (b0*c1-b1*c0)/D
+    y = (a1*c0-a0*c1)/D
+    return x, y
+
+
+def combine(lines):
+    def get_line(j):
+        if index[j] == 1:
+            current_line = [lines[j][2], lines[j][3], lines[j][4], lines[j][5]]
+        else:
+            if j == 0 or j == 2:
+                current_line = [5, lines[j][0], 20, lines[j][0]]
+            else:
+                current_line = [lines[j][0], 20, lines[j][0], 40]
+        return current_line
+
+    index = []
+    for i in range(len(lines)):
+        if len(lines[i]) == 1:
+            index.append(0)
+        else:
+            index.append(1)
+
+    if np.array(index).sum() < 2:
+        return None
+
+    cross_record = []
+    start_line = None
+    for s in range(len(index)):
+        p_current_line = get_line(s)
+        if s == 0:
+            start_line = p_current_line
+
+        if s == 3:
+            p_next_line = start_line
+        else:
+            p_next_line = get_line(s+1)
+        cross_point = get_line_cross_point(p_current_line, p_next_line)
+        cross_record.append(cross_point)
+    return cross_record
+
+
+def line_search(enhance_stats, lines):
+    x, y, w, h = enhance_stats
+    top = LineString([(x, y), (x + w, y)])
+    bottom = LineString([(x, y + h), (x + w, y + h)])
+    left = LineString([(x, y), (x, y + h)])
+    right = LineString([(x + w, y), (x + w, y + h)])
+
+    line_list = {"top": top, "left": left, "bottom": bottom, "right": right}
+    distance_thresh = 10
+    degree_thresh = 0.1
+    max_selection = 1
+    record = {"top": [], "left": [], "bottom": [], "right": []}
+
+    for key, value in line_list.items():
+        # print(key)
+        for (x0, y0, x1, y1) in lines:
+            current_line = LineString([(x0, y0), (x1, y1)])
+            current_degree = math.atan2(y0 - y1, x0 - x1)
+            current_dis = value.distance(current_line)
+            line_len = (x0 - x1)**2 + (y0 - y1)**2
+
+            if current_dis > distance_thresh:
+                continue
+
+            # print([current_dis, current_degree, x0, y0, x1, y1])
+
+            if key == "top" or key == "bottom":
+                if math.pi * 1/5 < abs(current_degree) < math.pi * 4/5:
+                    continue
+                if line_len > w**2 * 1.5 or line_len < w**2 / 3:
+                    continue
+            else:
+                if abs(current_degree) < math.pi * 1/3 or abs(current_degree) > math.pi * 2/3:
+                    continue
+                if line_len > h**2 * 1.5 or line_len < h**2 / 3:
+                    continue
+
+            status = True
+            for i in range(len(record[key])):
+                if abs(abs(abs(current_degree) - math.pi/2) - abs(abs(record[key][i][1]) - math.pi/2)) < degree_thresh:
+                    if record[key][i][0] > current_dis:
+                        record[key][i] = [current_dis, current_degree, x0, y0, x1, y1]
+                    status = False
+
+            if status:
+                record[key].append([current_dis, current_degree, x0, y0, x1, y1])
+
+    final_line = []
+    for key2, value2 in record.items():
+        value2.sort(key=lambda s: s[0], reverse=False)
+        num = min(len(value2), max_selection)
+        if num == 0:
+            if key2 == "top":
+                final_line.append([y])
+            elif key2 == "bottom":
+                final_line.append([y + h])
+            elif key2 == "left":
+                final_line.append([x])
+            else:
+                final_line.append([x + w])
+            continue
+
+        for j in range(num):
+            final_line.append(value2[j])
+
+    return final_line
+
+
+def revision():
+    img = cv2.imread("../demo/test.png")
+    img = cv2.resize(img, (2048, 1152))
+    img_orl = img
+
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    ret, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
+    kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
+    bin_clo = cv2.erode(binary, kernel1, iterations=1)
+    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(bin_clo, connectivity=8)
+
+    # print('num_labels = ', num_labels)
+    # print('labels = ', labels)
+    # # 不同的连通域赋予不同的颜色
+    lines, scr = lsd()
+
+    for i in range(1, num_labels):
+        # if i < 10:
+        #     continue
+        mask = labels == i
+        # 连通域的信息：对应各个轮廓的x、y、width、height和面积
+        if stats[i][4] < 100:
+            continue
+        # # 连通域的信息：对应各个轮廓的x、y、width、height和面积
+        # print('stats = ', stats[i])
+        # # 连通域的中心点
+        # print('centroids = ', centroids[i])
+
+        current_patch = np.zeros((img.shape[0], img.shape[1]), np.uint8)
+        current_patch[mask] = 255
+        # show_single(current_patch)
+
+        # kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
+        # current_patch = cv2.dilate(current_patch, kernel2, iterations=1)
+        # show_single(current_patch)
+
+        x, y, w, h = cv2.boundingRect(current_patch)
+        # print(x, y, w, h)
+        # cv2.rectangle(current_patch, (x, y), (x + w, y + h), (225, 0, 255), 2)
+        # show_single(current_patch)
+
+        detect_lines = line_search([x, y, w, h], lines)
+        final_point = combine(detect_lines)
+
+        if final_point is None:
+            continue
+
+        start_x, start_y = None, None
+        for w in range(len(final_point)):
+            x0, y0 = round(final_point[w][0]), round(final_point[w][1])
+            if w == 0:
+                start_x, start_y = x0, y0
+            if w == 3:
+                x1, y1 = start_x, start_y
+            else:
+                x1, y1 = round(final_point[w+1][0]), round(final_point[w+1][1])
+            cv2.line(scr, (x0, y0), (x1, y1), 255, 2, cv2.LINE_AA)
+
+    show_single(scr, save=True, name="lines_detect.png")
+
+
+if __name__ == '__main__':
+    revision()
diff --git a/line_detection/lsd.py b/line_detection/lsd.py
diff --git a/utils/engine.py b/utils/engine.py