diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..a43649a Binary files /dev/null and b/.DS_Store differ diff --git a/ProposalGeneration/ConvertBox4dToBox8d.m b/ProposalGeneration/ConvertBox4dToBox8d.m new file mode 100644 index 0000000..f153d2f --- /dev/null +++ b/ProposalGeneration/ConvertBox4dToBox8d.m @@ -0,0 +1,12 @@ +function ret_box = ConvertBox4dToBox8d( box ) + x1 = box(1); + y1 = box(2); + x2 = box(1) + box(3) - 1; + y2 = box(2); + x3 = box(1) + box(3) - 1; + y3 = box(2) + box(4) - 1; + x4 = box(1); + y4 = box(2) + box(4) - 1; + ret_box = [y1, x1; y2, x2; y3, x3; y4, x4]; +end + diff --git a/ProposalGeneration/ConvertBox8dToBox4d.m b/ProposalGeneration/ConvertBox8dToBox4d.m new file mode 100644 index 0000000..4538bc9 --- /dev/null +++ b/ProposalGeneration/ConvertBox8dToBox4d.m @@ -0,0 +1,7 @@ +function retBox = ConvertBox8dToBox4d( bbox_eightPixel ) + retBox = [min(bbox_eightPixel(2,:)), ... + min(bbox_eightPixel(1,:)), ... + max(bbox_eightPixel(2,:)) - min(bbox_eightPixel(2,:)) + 1, ... + max(bbox_eightPixel(1,:)) - min(bbox_eightPixel(1,:)) + 1]; +end + diff --git a/ProposalGeneration/box_nms.m b/ProposalGeneration/box_nms.m new file mode 100644 index 0000000..beca528 --- /dev/null +++ b/ProposalGeneration/box_nms.m @@ -0,0 +1,20 @@ +function nms_idx = box_nms(boxes, scores, area_threashold) + [~, sort_idx] = sort(scores, 'descend'); + boxes = boxes(sort_idx, :); + area = boxes(:,3) .* boxes(:,4); + + nms_flag = ones(size(boxes, 1), 1); + for i = 1 : size(boxes,1) + if(nms_flag(i) == 0) + continue; + end + + int_area = rectint(boxes(i, :), boxes)'; + area_ratio = int_area ./ (area + area(i) - int_area); + area_ratio(1 : i) = 0; + nms_flag(area_ratio > area_threashold) = 0; + end + + nms_idx = sort_idx(logical(nms_flag)); +end + diff --git a/ProposalGeneration/computeMergedBox.m b/ProposalGeneration/computeMergedBox.m new file mode 100644 index 0000000..7d1c5b4 --- /dev/null +++ b/ProposalGeneration/computeMergedBox.m @@ -0,0 +1,20 @@ +function [ rectX, rectY, area, perimeter, sideLength] = computeMergedBox( boxes, append_pixels ) + nBoxes = size(boxes, 1); + compPoints = zeros(nBoxes* 4, 2); + for nComp = 1 : size(boxes,1) + loopBox = boxes(nComp, :); + compPoints((nComp - 1) * 4 + 1, :) = [loopBox(2), loopBox(1)]; + compPoints((nComp - 1) * 4 + 2, :) = [loopBox(2), loopBox(1) + loopBox(3) - 1]; + compPoints((nComp - 1) * 4 + 3, :) = [loopBox(2) + loopBox(4) - 1, loopBox(1) + loopBox(3) - 1]; + compPoints((nComp - 1) * 4 + 4, :) = [loopBox(2) + loopBox(4) - 1, loopBox(1)]; + end + + if(nargin == 2) + compPoints = cat(1, compPoints, append_pixels); + end + + [rectX, rectY, area, perimeter] = minboundrect(compPoints(:, 2), compPoints(:, 1)); + + sideLength = ((rectX(2 : 5) - rectX(1 : 4)) .^ 2 + (rectY(2 : 5) - rectY(1 : 4)).^2).^0.5; +end + diff --git a/ProposalGeneration/computeMergedBoxByOrientation.m b/ProposalGeneration/computeMergedBoxByOrientation.m new file mode 100644 index 0000000..359e71e --- /dev/null +++ b/ProposalGeneration/computeMergedBoxByOrientation.m @@ -0,0 +1,45 @@ +function [ rectX, rectY, area, perimeter, sideLength] = computeMergedBoxByOrientation(img, pixels, estimated_orientation) + %% Compute convex hull + DT = delaunayTriangulation(pixels(:, 2), pixels(:, 1)); + convexPointIdx = convexHull(DT); + convexPoints = DT.Points(convexPointIdx, :); + convexPoints = convexPoints(:, [2 1]); +% imshow(img); +% hold on; +% plot(convexPoints(:, 2), convexPoints(:, 1)); +% hold off; + + %% Compute centralized pixels + cx = mean(convexPoints(:, 2)); + cy = mean(convexPoints(:, 1)); + + centralizedPixels = pixels; + centralizedPixels(:, 1) = centralizedPixels(:, 1) - cy; + centralizedPixels(:, 2) = centralizedPixels(:, 2) - cx; + + %% compute rotate pixels + rotate_mat = [cosd(estimated_orientation), -sind(estimated_orientation); sind(estimated_orientation), cosd(estimated_orientation)]; + centralizedPixels = centralizedPixels(:, [2, 1]); + rotatePixels = (rotate_mat * centralizedPixels')'; + + %% swap X and Y + rotatePixels = rotatePixels(:, [2, 1]); + centralizedPixels = centralizedPixels(:, [2, 1]); + + %% compute min_box of rotate pixels + rotate_min_box = [min(rotatePixels(:,2)), ... + min(rotatePixels(:,1)), ... + max(rotatePixels(:,2)) - min(rotatePixels(:,2)), ... + max(rotatePixels(:,1)) - min(rotatePixels(:,1))]; + rotate_min_box8p = ConvertBox4dToBox8d(rotate_min_box); + rotate_min_box8p = getRotateBox8D(rotate_min_box8p, -estimated_orientation, 0, 0); + + + + rectX = [rotate_min_box8p(:, 2); rotate_min_box8p(1, 2)] + cx; + rectY = [rotate_min_box8p(:, 1); rotate_min_box8p(1, 1)] + cy; + + area = polyarea(rectX, rectY); + sideLength = ((rectY(2 : 5) - rectY(1 : 4)) .^ 2 + (rectX(2 : 5) - rectX(1 : 4)) .^ 2) .^ 0.5; + perimeter = sum(sideLength); +end diff --git a/ProposalGeneration/computeOrientationDiff.m b/ProposalGeneration/computeOrientationDiff.m new file mode 100644 index 0000000..cf28c6a --- /dev/null +++ b/ProposalGeneration/computeOrientationDiff.m @@ -0,0 +1,10 @@ +function orientationDiff = computeOrientationDiff(pixels, relativePixel, estimated_orientation) + diffVector = zeros(size(pixels,1), 2); + diffVector(:, 1) = pixels(:, 1) - relativePixel(1); + diffVector(:, 2) = pixels(:, 2) - relativePixel(2); + + atanDiff = - atan(diffVector(: ,1) ./ (diffVector(:, 2) + eps)) * 180 / pi; + + orientationDiff = abs(atanDiff - estimated_orientation); + orientationDiff = min(180 - orientationDiff, orientationDiff); +end diff --git a/ProposalGeneration/estimateOrientation.m b/ProposalGeneration/estimateOrientation.m new file mode 100644 index 0000000..c622e24 --- /dev/null +++ b/ProposalGeneration/estimateOrientation.m @@ -0,0 +1,160 @@ +function orientation = estimateOrientation(img, region, region_comp_infos, orient_param) + global param + + %% return if region_comp_infos is empty + if isempty(region_comp_infos) + orientation = NaN; + return + end + + if length(region_comp_infos) == 1 + orientation = 0; + return; + end + + if(orient_param.minOrientation == orient_param.maxOrientation) + orientation = 0; + return; + end + + box8d = cell(length(region_comp_infos), 1); + for i = 1 : length(region_comp_infos) + box8d{i} = ConvertBox4dToBox8d(region_comp_infos{i}.box); + end + + if(false && param.debug) + debug_box = zeros(length(region_comp_infos), 4); + for d_i = 1 : length(region_comp_infos) + debug_box(d_i,:) = region_comp_infos{d_i}.box; + end + show_bbox(img, debug_box); + end + + region_cx = mean(region(:,2)); + region_cy = mean(region(:,1)); + + orientations = [orient_param.minOrientation : orient_param.orientationInterval : orient_param.maxOrientation]; + hitBoxCount = zeros(length(orientations), 1); + + + y1_arr = zeros(length(region_comp_infos), 1); + y2_arr = zeros(length(region_comp_infos), 1); + for i = 1 : length(orientations) + + for j = 1 : length(region_comp_infos) + rotate_box8d = getRotateBox8D(box8d{j}, orientations(i), region_cx, region_cy); + + y1_arr(j) = min(rotate_box8d(:,1)); + y2_arr(j) = max(rotate_box8d(:,1)); + + y_offest_arr = y2_arr(j) - y1_arr(j) + 1; + y1_arr(j) = y1_arr(j) + floor(0.3 * y_offest_arr); + y2_arr(j) = y2_arr(j) - floor(0.3 * y_offest_arr); + end + + min_y_arr = min(y1_arr); + y_map = zeros(max(y2_arr) - min_y_arr + 1, 1); + for j = 1 : length(region_comp_infos) + y_map(y1_arr(j) - min_y_arr + 1 : y2_arr(j) - min_y_arr + 1) = ... + y_map(y1_arr(j) - min_y_arr + 1 : y2_arr(j) - min_y_arr + 1) + 1; + end + + hitBoxCount(i) = max(y_map); + + if false && param.debug + imshow(img); + hold on; + for k = 1 : length(region_comp_infos) + cy = mean(box8d{k}(:,1)); + cx = mean(box8d{k}(:,2)); + + plot(cx,cy, '*', 'color', 'y'); + + rotate_box8d = getRotateBox8D(box8d{k}, orientations(i), region_cx, region_cy); + cy = mean(rotate_box8d(:,1)); + cx = mean(rotate_box8d(:,2)); + plot(cx,cy, '*', 'color', 'r'); + + plot([rotate_box8d(:, 2); rotate_box8d(1, 2)], [rotate_box8d(:, 1); rotate_box8d(1, 1)]); + end + hold off; + end + end + + max_hit_box = max(hitBoxCount); + + %% calc_continues + continueScore = zeros(length(hitBoxCount), 1); + if(hitBoxCount(length(hitBoxCount)) == max_hit_box) + continueScore(length(hitBoxCount)) = 1; + end + + for i = length(hitBoxCount) - 1 : -1 : 1 + if(hitBoxCount(i) == max_hit_box) + continueScore(i) = continueScore(i+1) + 1; + else + continueScore(i) = 0; + end + end + + left_most = find(continueScore == max(continueScore)); + if(length(left_most) > 1) + [~, middlest] = min(abs(left_most - length(hitBoxCount))); + left_most = left_most(middlest); + end + + right_most = left_most; + len_max_hit_box = 1; + + while true + i = left_most - 1; + if(i == 0) + i = length(hitBoxCount); + end + if(hitBoxCount(i) == max_hit_box && right_most ~= i) + left_most = i; + len_max_hit_box = len_max_hit_box + 1; + else + break; + end + end + + while true + i = right_most + 1; + if(i >= length(hitBoxCount)) + i = 1; + end + if(hitBoxCount(i) == max_hit_box && left_most ~= i) + right_most = i; + len_max_hit_box = len_max_hit_box + 1; + else + break; + end + end + + max_orientation_idx = left_most + round(len_max_hit_box/2); + if max_orientation_idx > length(hitBoxCount) + max_orientation_idx = max_orientation_idx - length(hitBoxCount); + end + orientation = orientations(max_orientation_idx); + + if param.debug + imshow(img); + hold on; + for k = 1 : length(region_comp_infos) + cy = mean(box8d{k}(:,1)); + cx = mean(box8d{k}(:,2)); + + plot(cx,cy, '*', 'color', 'y'); + + rotate_box8d = getRotateBox8D(box8d{k}, orientation, region_cx, region_cy); + + cy = mean(rotate_box8d(:,1)); + cx = mean(rotate_box8d(:,2)); + + plot(cx,cy, '*', 'color', 'r'); + end + hold off; + end +end + diff --git a/ProposalGeneration/findWidthAndHeight.m b/ProposalGeneration/findWidthAndHeight.m new file mode 100644 index 0000000..e3bfa37 --- /dev/null +++ b/ProposalGeneration/findWidthAndHeight.m @@ -0,0 +1,15 @@ +function [width, height] = findWidthAndHeight(rectX, rectY, sideLenght, estimated_orientation) + pixels = zeros(4, 2); + for n = 1 : 4 + pixels(n, :) = [(rectY(n) + rectY(n+1))/2, (rectX(n) + rectX(n+1))/2]; + end + orientationDiff1 = computeOrientationDiff(pixels(1, :), pixels(3, :), estimated_orientation); + orientationDiff2 = computeOrientationDiff(pixels(2, :), pixels(4, :), estimated_orientation); + if(orientationDiff1 > orientationDiff2) + width = round(0.5 * (sideLenght(1) + sideLenght(3))); + height = round(0.5 * (sideLenght(2) + sideLenght(4))); + else + width = round(0.5 * (sideLenght(2) + sideLenght(4))); + height = round(0.5 * (sideLenght(1) + sideLenght(3))); + end +end diff --git a/ProposalGeneration/genProposalsByOrientation.m b/ProposalGeneration/genProposalsByOrientation.m new file mode 100644 index 0000000..91dcce0 --- /dev/null +++ b/ProposalGeneration/genProposalsByOrientation.m @@ -0,0 +1,228 @@ +function proposals = genProposalsByOrientation(img, region, regionPerim, estimated_orientation, region_comp_infos) + global param; + + if(isnan(estimated_orientation)) + proposals = zeros(0, 9); + return + end + + [comp_cluster_ind, ~] = compCluster(img, region_comp_infos, ... + estimated_orientation, ... + param.minCompHeightSimilarity, ... + param.maxCompOrientationDiff, ... + param.minIoUDiff); + + clusterIdx = unique(comp_cluster_ind); + clusterCount = length(clusterIdx); + proposals = zeros(clusterCount, 9); + confident_idx = false(clusterCount, 1); + + for c = 1 : clusterCount + clusterCompInfos = region_comp_infos(comp_cluster_ind == clusterIdx(c)); + if(length(clusterCompInfos) > 0) + confident_idx(c) = true; + proposals(c, 1 : 8) = getProposal(img, region, regionPerim, clusterCompInfos, estimated_orientation); + proposals(c, 9) = estimated_orientation; + end + end + + proposals = proposals(confident_idx, :); + + %% show proposals + if true && param.debug + imshow(img); + hold on; + for i = 1 : clusterCount + x_arr = [proposals(i, 1 : 2 : 8), proposals(i, 1)]; + y_arr = [proposals(i, 2 : 2 : 8), proposals(i, 2)]; + + plot(x_arr, y_arr, 'color', rand(3,1)); + end + hold off; + end + + %% show clusters + if false && param.debug + debug_bbox_gathered = zeros(0, 4); + color_gathered = zeros(0, 3); + for c = 1: clusterCount + clusterCompInfos = region_comp_infos(comp_cluster_ind == clusterIdx(c)); + debug_bbox = zeros(length(clusterCompInfos), 4); + color = repmat(rand(1,3), length(clusterCompInfos), 1); + + for d_i = 1 : length(clusterCompInfos) + debug_bbox(d_i,:) = clusterCompInfos{d_i}.box; + end + + debug_bbox_gathered = cat(1, debug_bbox_gathered, debug_bbox); + color_gathered = cat(1, color_gathered, color); + end + show_bbox(img, debug_bbox_gathered, color_gathered); + end +end + +function [comp_cluster_ind, clusterCount] = compCluster(img, region_comp_infos, estimated_orientation, minCompHeightSimilarity, maxCompOrientationDiff, minIoUDiff) + global param; + + compCount = length(region_comp_infos); + + if(compCount == 0) + comp_cluster_ind = zeros(0, 1); + clusterCount = 0; + return; + end + + if(compCount == 1) + comp_cluster_ind = ones(1,1); + clusterCount = 1; + return; + end + + comp_cluster_ind = zeros(compCount, 1, 'uint16'); + clusterCount = 0; + + box4d = zeros(compCount, 4); + heights = zeros(compCount, 1); + + boxCenter = zeros(compCount, 2); + boxCenter_U = zeros(compCount, 2); + boxCenter_D = zeros(compCount, 2); + boxCenter_L = zeros(compCount, 2); + boxCenter_R = zeros(compCount, 2); + + for i = 1 : compCount + box4d(i, :) = region_comp_infos{i}.box; + [rectX, rectY, ~, ~, sideLenght] = computeMergedBox(box4d(i, :)); + [~, heights(i)] = findWidthAndHeight(rectX, rectY, sideLenght, estimated_orientation); + + boxCenter(i, 2) = round(box4d(i, 1) + 0.5*box4d(i, 3)); + boxCenter(i, 1) = round(box4d(i, 2) + 0.5*box4d(i, 4)); + + boxCenter_U(i, 2) = round(box4d(i, 1) + 0.5*box4d(i, 3)); + boxCenter_U(i, 1) = box4d(i, 2); + + boxCenter_D(i, 2) = round(box4d(i, 1) + 0.5*box4d(i, 3)); + boxCenter_D(i, 1) = box4d(i,2) + box4d(i,4) - 1; + + boxCenter_L(i, 2) = box4d(i,1); + boxCenter_L(i, 1) = round(box4d(i, 2) + 0.5*box4d(i, 4)); + + boxCenter_R(i, 2) = box4d(i,1) + box4d(i,3) - 1; + boxCenter_R(i, 1) = round(box4d(i, 2) + 0.5*box4d(i, 4)); + end + + heightSimilarity = zeros(compCount); + IoUDiff = zeros(compCount); + orientationDiffOfBox = zeros(compCount); + dists = zeros(compCount); + + for i = 1 : compCount + + %% Compute height similarity + heightSimilarity(i,:) = min( ... + min(box4d(i, 4), box4d(:, 4)) ./ max(box4d(i, 4), box4d(:, 4)), ... + min(box4d(i, 3), box4d(:, 3)) ./ max(box4d(i, 3), box4d(:, 3))); + %% Compute IoU diff + intArea = rectint(box4d(i,:), box4d); + unionArea = box4d(i,3) * box4d(i,4) + box4d(:, 3).* box4d(:, 4); + IoUDiff(i, :) = intArea ./ unionArea'; + + %% Compute orientation diff of box + orientationDiff_U = computeOrientationDiff(boxCenter_U, boxCenter_U(i,:), estimated_orientation); + orientationDiff_D = computeOrientationDiff(boxCenter_D, boxCenter_D(i,:), estimated_orientation); + orientationDiff_L = computeOrientationDiff(boxCenter_L, boxCenter_L(i,:), estimated_orientation); + orientationDiff_R = computeOrientationDiff(boxCenter_R, boxCenter_R(i,:), estimated_orientation); + orientationDiffOfBox(i, :) = min(min(orientationDiff_U, orientationDiff_D), ... + min(orientationDiff_L, orientationDiff_R)); + + %% Compute dist + dists(i, :) = ((boxCenter(:, 1) - boxCenter(i, 1)).^2 + (boxCenter(:, 2) - boxCenter(i,2)).^2).^0.5; + end + + if true && param.debug + imshow(img); + for i = 1 : compCount + rectangle('position', box4d(i, :), 'edgecolor','y'); + end + end + + for i = 1 : compCount + if comp_cluster_ind(i) == 0 + clusterCount = clusterCount + 1; + current_cluster_comp_ind = false(compCount, 1); + current_cluster_comp_ind(i) = 1; + + isUpdate = true; + while isUpdate + isUpdate = false; + + %% Compute boxCenter of merged bbox + mergedBoxCenter_U = [mean(box4d(current_cluster_comp_ind, 2)), ... + mean(box4d(current_cluster_comp_ind, 1) + 0.5*box4d(current_cluster_comp_ind, 3))]; + + mergedBoxCenter_D = [mean(box4d(current_cluster_comp_ind, 2) + box4d(current_cluster_comp_ind, 4) - 1), ... + mean(box4d(current_cluster_comp_ind, 1) + 0.5*box4d(current_cluster_comp_ind, 3))]; + + mergedBoxCenter_L = [mean(box4d(current_cluster_comp_ind, 2) + 0.5*box4d(current_cluster_comp_ind, 4)), ... + mean(box4d(current_cluster_comp_ind, 1))]; + + mergedBoxCenter_R = [mean(box4d(current_cluster_comp_ind, 2) + 0.5*box4d(current_cluster_comp_ind, 4)), ... + mean(box4d(current_cluster_comp_ind, 1) + box4d(current_cluster_comp_ind, 3) - 1)]; + + %% Compute orientation diff + orientationDiff_U = computeOrientationDiff(boxCenter_U, mergedBoxCenter_U, estimated_orientation); + orientationDiff_D = computeOrientationDiff(boxCenter_D, mergedBoxCenter_D, estimated_orientation); + orientationDiff_L = computeOrientationDiff(boxCenter_L, mergedBoxCenter_L, estimated_orientation); + orientationDiff_R = computeOrientationDiff(boxCenter_R, mergedBoxCenter_R, estimated_orientation); + + orientationDiff = min(min(orientationDiff_U, orientationDiff_D), min(orientationDiff_L, orientationDiff_R))'; + + heightSimilarity_rule = heightSimilarity(current_cluster_comp_ind,:) > minCompHeightSimilarity; + orientationDiff_rule = orientationDiff < maxCompOrientationDiff; + IoUDiff_rule = IoUDiff(current_cluster_comp_ind, :) > minIoUDiff; + orientationDiffOfBox_rule = heightSimilarity_rule ... + & (orientationDiffOfBox(current_cluster_comp_ind, :) < maxCompOrientationDiff) ... + & (dists(current_cluster_comp_ind, :) < param.maxDistRatio * mean(box4d(current_cluster_comp_ind, 4))); + + spatial_rule = ((sum(heightSimilarity_rule, 1) > 0) & sum(orientationDiffOfBox_rule, 1) > 0) ... + | (sum(IoUDiff_rule, 1) > 0) ... + | (orientationDiff_rule & (sum(heightSimilarity_rule, 1) > 0)); + %spatial_rule = sum((heightSimilarity_rule & orientationDiff_rule) | IoUDiff_rule, 1) > 0; + + satisfiedComp = spatial_rule' ... + & (~current_cluster_comp_ind); + + satisfiedIdx = find(satisfiedComp); + if(~isempty(satisfiedIdx)) + isUpdate = true; + current_cluster_comp_ind(satisfiedIdx) = 1; +% x1_tmp = min(min(box4d(satisfiedIdx, 1)), ... +% mergedBox(1)); +% x2_tmp = max(max(box4d(satisfiedIdx, 1) + box4d(satisfiedIdx, 3) - 1), ... +% mergedBox(1) + mergedBox(3) - 1); +% y1_tmp = min(min(box4d(satisfiedIdx, 2)), ... +% mergedBox(2)); +% y2_tmp = max(max(box4d(satisfiedIdx, 2) + box4d(satisfiedIdx, 4) - 1), ... +% mergedBox(2) + mergedBox(4) - 1); +% mergedBox(1) = x1_tmp; +% mergedBox(2) = y1_tmp; +% mergedBox(3) = x2_tmp - x1_tmp + 1; +% mergedBox(4) = y2_tmp - y1_tmp + 1; + + + if true && param.debug + clusterCompInfos = region_comp_infos(current_cluster_comp_ind); + color = zeros(length(clusterCompInfos), 3); + color(:, 2) = 1; + color(1, :) = [1, 0, 0]; + imshow(img); + for d_i = 1 : length(clusterCompInfos) + rectangle('position', clusterCompInfos{d_i}.box, 'edgecolor', color(d_i,:)); + end + end + end + end + comp_cluster_ind(current_cluster_comp_ind) = clusterCount; + end + end +end diff --git a/ProposalGeneration/gen_proposals.m b/ProposalGeneration/gen_proposals.m new file mode 100644 index 0000000..51a16f7 --- /dev/null +++ b/ProposalGeneration/gen_proposals.m @@ -0,0 +1,122 @@ +function proposals = gen_proposals(img, map, resizeRatio, minmaxRF) + global param + global globalVar + + %% bw prob map + f = fspecial('gaussian',[5 5],7); + map = imfilter(map,f,'same'); + + bwmap = map > param.minRegionProb; + + SE1=strel('disk',3); + bwmap = imerode(bwmap,SE1); + bwmap = imdilate(bwmap,SE1); + bwmap = bwmap>0; + bwmap = imfill(bwmap,'holes'); + + + %% process each region + [L, L_num] = bwlabel(bwmap, 8); + proposals = cell(L_num, 1); + estimated_orientations = zeros(L_num, 1); + regions = cell(L_num, 1); + regionPerims = cell(L_num, 1); + totalTimeOfGetCompOfRegion = 0; + totalTimeOfEstimateOrientation = 0; + totalTimeOfGenProposalsByOrientation = 0; + for i = 1 : L_num + regionMap = L == i; + + %% construct regions + [y, x] = find(regionMap); + regions{i} = [y, x]; + + %% Substruct sub img and sub region map by regions + regionMinX= min(x); + regionMaxX = max(x); + regionMinY = min(y); + regionMaxY = max(y); + regionHeight = regionMaxY - regionMinY + 1; + regionWidth = regionMaxX - regionMinX + 1; + + extendRegionMinX = max(1, regionMinX - regionWidth * 0.005); + extendRegionMaxX = min(size(img, 2), regionMaxX + regionWidth * 0.005); + extendRegionMinY = max(1, regionMinY - regionHeight * 0.005); + extendRegionMaxY = min(size(img, 1), regionMaxY + regionHeight * 0.005); + + subImg = img(extendRegionMinY : extendRegionMaxY, extendRegionMinX : extendRegionMaxX, :); + subRegionMap = regionMap(extendRegionMinY : extendRegionMaxY, extendRegionMinX : extendRegionMaxX); + [subY, subX] = find(subRegionMap); + subRegions = [subY, subX]; + + %% Get comp info from subImg + %comp_infos = normal_mser3(subImg, param.mser_info, resizeRatio, minmaxRF); + comp_infos = normal_mser(subImg, resizeRatio, minmaxRF, [globalVar.imgName, '_', num2str(i)]); + + %% ---- for debug ---- + if(param.debug) + boxes = zeros(length(comp_infos), 4); + for n = 1 : length(comp_infos) + boxes(n,:) = comp_infos{n}.box; + end + show_bbox(subImg, boxes); + end + + %% construct regionPerims + subRegionMap = imfill(subRegionMap, 'holes'); + perimMap = bwperim(subRegionMap); + [y, x] = find(perimMap); + subRegionPerims = [y, x]; + + tic; + [region_comp_infos] = getCompOfRegion(subImg, subRegions, comp_infos, param.minRegionCompCoveredArea, param.maxRegionCompArea); + tmpTime = toc; + totalTimeOfGetCompOfRegion = totalTimeOfGetCompOfRegion + tmpTime; + + %% generate secondary_region_comp_infos +% [~, secondary_region_comp_idx] = getCompOfRegion(img, regions{i}, comp_infos, param.secondaryMinRegionCompArea); +% secondary_region_comp_flags = false(length(comp_infos), 1); +% secondary_region_comp_flags(secondary_region_comp_idx) = true; +% secondary_region_comp_flags(region_comp_idx) = false; +% secondary_region_comp_infos = num2cell(comp_infos(secondary_region_comp_flags)); + + if(param.debug) + debug_bbox = zeros(length(region_comp_infos), 4); + for d_i = 1 : length(region_comp_infos) + debug_bbox(d_i,:) = region_comp_infos{d_i}.box; + end + show_bbox(subImg, debug_bbox); + end + + if(false && param.debug) + debug_bbox = zeros(length(secondary_region_comp_infos), 4); + for d_i = 1 : length(secondary_region_comp_infos) + debug_bbox(d_i,:) = secondary_region_comp_infos{d_i}.box; + end + show_bbox(subImg, debug_bbox); + end + + tic; + estimated_orientations(i) = estimateOrientation(subImg, subRegions, region_comp_infos, param.orient_param); + tmpTime = toc; + totalTimeOfEstimateOrientation = totalTimeOfEstimateOrientation + tmpTime; + + tic; + proposals{i} = genProposalsByOrientation(subImg, subRegions, subRegionPerims, estimated_orientations(i), region_comp_infos); + tmpTime = toc; + totalTimeOfGenProposalsByOrientation = totalTimeOfGenProposalsByOrientation + tmpTime; + + proposals{i}(:, 1 : 2 : 8) = proposals{i}(:, 1 : 2 : 8) + extendRegionMinX; + proposals{i}(:, 2 : 2 : 8) = proposals{i}(:, 2 : 2 : 8) + extendRegionMinY; + end + + if(false) + fprintf('TotalTime: %.2f, %.2f, %.2f\n', ... + totalTimeOfGetCompOfRegion, ... + totalTimeOfEstimateOrientation, ... + totalTimeOfGenProposalsByOrientation); + end + + proposals = cell2mat(proposals); +end + diff --git a/ProposalGeneration/getCompOfRegion.m b/ProposalGeneration/getCompOfRegion.m new file mode 100644 index 0000000..7efc966 --- /dev/null +++ b/ProposalGeneration/getCompOfRegion.m @@ -0,0 +1,41 @@ +function [ region_comp, region_comp_idx ] = getCompOfRegion(img, region, comp_infos, minRegionCompCoveredArea, maxRegionCompArea) + %Get components which inside in the region + global param; + + global getCompOfRegion_px; + global getCompOfRegion_py; + + getCompOfRegion_px = region(:,2); + getCompOfRegion_py = region(:,1); + + if(true && param.debug) + for i = 1 : size(region, 1) + img(region(i,1), region(i,2), :) = [1,1,1]; + end + end + + maxRegionCompAreaValue = size(region, 1) * maxRegionCompArea; + + region_comp_flag = false(length(comp_infos), 1); + for i = 1 : length(comp_infos) + comp_box = comp_infos{i}.box; + in_pixel_num = length(find((getCompOfRegion_px >= comp_box(1)) & ... + (getCompOfRegion_px <= comp_box(1) + comp_box(3) - 1) & ... + (getCompOfRegion_py >= comp_box(2)) & ... + (getCompOfRegion_py <= comp_box(2) + comp_box(4) - 1))); + comp_box_area = comp_box(3) * comp_box(4); + if(in_pixel_num > comp_box_area * minRegionCompCoveredArea && comp_box_area < maxRegionCompAreaValue) + region_comp_flag(i) = true; + end + end + region_comp = comp_infos(region_comp_flag); + region_comp_count = sum(region_comp_flag); + + box = zeros(region_comp_count, 4); + for i = 1 : region_comp_count + box(i,:) = region_comp{i}.box; + end + nms_idx = box_nms(box, box(:,3) .* box(:,4), 0.8); + + region_comp = region_comp(nms_idx); +end diff --git a/ProposalGeneration/getMser.py b/ProposalGeneration/getMser.py new file mode 100644 index 0000000..a3ad31e --- /dev/null +++ b/ProposalGeneration/getMser.py @@ -0,0 +1,58 @@ +#!/usr/bin/python +# -*- coding: UTF-8 -*- +from sys import argv +from matplotlib import pyplot as plt +import cv2 +import cv +import numpy as np +from compiler.ast import flatten + +def computeMSER(grayImg): + mser = cv2.MSER(1, 20, 200000, 1, 0.01, 200, 1.01, 0.003, 5) + regions = mser.detect(grayImg) + regionNum = len(regions) + boxes = np.zeros((regionNum, 4)) + minAreaBoxes = np.zeros((regionNum, 8)) + + for i in range(0, regionNum): + region = np.array(regions[i]) + minXY = np.min(region, 0) + maxXY = np.max(region, 0) + boxes[i, :] = [minXY[0] + 1, minXY[1] + 1, maxXY[0] - minXY[0] + 1, maxXY[1] - minXY[1]+1] + minAreaBox = np.int0(cv2.cv.BoxPoints(cv2.minAreaRect(regions[i]))) + minAreaBox = np.reshape(minAreaBox, (8)) + minAreaBoxes[i, :] = minAreaBox + + return boxes, minAreaBoxes + +if __name__ == '__main__': + imgPath = argv[1] + resPath = argv[2] + + img = cv2.imread(imgPath) + + # Compute MSER for the image + grayImg = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) + mserBoxes, minAreaBoxes = computeMSER(grayImg) + + # mserBoxes = np.append(mserBoxes1, mserBoxes2, 0) + + + # Construct the output strings + mserNum = mserBoxes.shape[0] + outputStrings = [] + for i in range(0, mserNum): + outputString = "" + #outputString = '%d %d %d %d\r\n' % (mserBoxes[i, 0], mserBoxes[i, 1], mserBoxes[i, 2], mserBoxes[i, 3]) + for j in range(0, 8): + outputString = outputString + '%d ' % (minAreaBoxes[i, j]) + + outputString = outputString + "\r\n" + outputStrings.append(outputString) + + fo = open(resPath, 'w') + fo.writelines(outputStrings) + + fo.close() + + diff --git a/ProposalGeneration/getProposal.m b/ProposalGeneration/getProposal.m new file mode 100644 index 0000000..45180f1 --- /dev/null +++ b/ProposalGeneration/getProposal.m @@ -0,0 +1,37 @@ +function proposal = getProposal(img, region, regionPerim, clusterCompInfos, estimated_orientation) + + %% collecting box and its points + boxesPoints = zeros(length(clusterCompInfos) * 4, 2); + boxes = zeros(length(clusterCompInfos), 4); + for nComp = 1 : length(clusterCompInfos) + boxes(nComp, :) = clusterCompInfos{nComp}.box; + if(size(clusterCompInfos{nComp}.box8d, 2) == 8) + boxesPoints((nComp - 1) * 4 + 1 : (nComp - 1) * 4 + 4, 1) = clusterCompInfos{nComp}.box8d(2 : 2 : 8); + boxesPoints((nComp - 1) * 4 + 1 : (nComp - 1) * 4 + 4, 2) = clusterCompInfos{nComp}.box8d(1 : 2 : 8); + else + boxesPoints((nComp - 1) * 4 + 1 : (nComp - 1) * 4 + 4, 1) = clusterCompInfos{nComp}.box8d(:, 1); + boxesPoints((nComp - 1) * 4 + 1 : (nComp - 1) * 4 + 4, 2) = clusterCompInfos{nComp}.box8d(:, 2); + end +% boxesPoints((nComp - 1) * 4 + 1, :) = [boxes(nComp, 2), boxes(nComp, 1)]; +% boxesPoints((nComp - 1) * 4 + 2, :) = [boxes(nComp, 2) + boxes(nComp, 4) - 1, boxes(nComp,1)]; +% boxesPoints((nComp - 1) * 4 + 3, :) = [boxes(nComp, 2) + boxes(nComp, 4) - 1, ... +% boxes(nComp,1) + boxes(nComp,3) - 1]; +% boxesPoints((nComp - 1) * 4 + 4, :) = [boxes(nComp, 2), boxes(nComp,1) + boxes(nComp, 3) - 1]; + end + + center_x = mean(boxes(:,1) + 0.5 * boxes(:,3)); + center_y = mean(boxes(:,2) + 0.5 * boxes(:,4)); + + orientationDiff = computeOrientationDiff(regionPerim, [center_y, center_x], estimated_orientation); + + boundary_pixels = regionPerim(orientationDiff < 3, :); + + %[rectX, rectY, area, ~, sideLenght] = computeMergedBox(boxes, boundary_pixels); + [rectX, rectY, area, ~, sideLength] = computeMergedBoxByOrientation(img, cat(1, boxesPoints, boundary_pixels), estimated_orientation); + [width, height] = findWidthAndHeight(rectX, rectY, sideLength, estimated_orientation); + + proposal = zeros(1, 8); + proposal(1 : 2 : 8) = rectX(1 : 4); + proposal(2 : 2 : 8) = rectY(1 : 4); +end + diff --git a/ProposalGeneration/getRotateBox8D.m b/ProposalGeneration/getRotateBox8D.m new file mode 100644 index 0000000..9b43104 --- /dev/null +++ b/ProposalGeneration/getRotateBox8D.m @@ -0,0 +1,18 @@ +function [ ret_box8d ] = getRotateBox8D( box8d, orientation, center_x, center_y) + boxes = box8d; + rotate_mat = [cosd(orientation), -sind(orientation); sind(orientation), cosd(orientation)]; + + %% move to center + boxes(:,1) = boxes(:,1) - center_y; + boxes(:,2) = boxes(:,2) - center_x; + boxes = boxes(:,[2 1])'; + ret_box8d = rotate_mat * boxes; + ret_box8d = ret_box8d([2,1],:)'; + + %% resotre position + ret_box8d(:,1) = ret_box8d(:,1) + center_y; + ret_box8d(:,2) = ret_box8d(:,2) + center_x; + + ret_box8d = round(ret_box8d); +end + diff --git a/ProposalGeneration/main.m b/ProposalGeneration/main.m new file mode 100644 index 0000000..12d5f8c --- /dev/null +++ b/ProposalGeneration/main.m @@ -0,0 +1,114 @@ +function main() + addpath('../include/vlfeat-0.9.20/toolbox/'); + vl_setup(); + addpath(genpath('../include/eccv14text')); + %data_infos + data_infos.img_path = '../data/msra_torch/im/'; + data_infos.map_path = '../data/msra_torch/multiscale/'; + data_infos.res_path = '../data/msra_torch/proposal_res/'; + + %global variable + global globalVar + + %gen_proposals param + global param + param.workPath = '/Users/zhangzheng/Documents/FCN_FULL/genProposal'; + param.reuse_mser = false; + param.debug = false; + param.minRegionProb = 0.2; + param.minRegionCompCoveredArea = 0.7; + param.maxRegionCompArea = 1; + param.secondaryMinRegionCompArea = 0.5; + param.orient_param.orientationInterval = 2; + param.orient_param.minOrientation = -90; + param.orient_param.maxOrientation = 90; + param.minCompHeightSimilarity = 0.7; + param.maxCompOrientationDiff = 3; + param.minIoUDiff = 0.85; + param.maxDistRatio = 2; + %minmaxRFs = [137, 68, 5]; + minmaxRFs = [137, 32, 5]; + + %% Only used for normal_mser3 + param.mser_info.delta = 1; + param.mser_info.minArea = 0.002; + param.mser_info.maxArea = 1; + param.mser_info.minDiversity = 0.8; + param.mser_info.maxVariation = 0.15; + + mkdir(data_infos.res_path); + + extension = '.jpg'; + imgData = dir([data_infos.img_path,'*.jpg']);% original image. + if(length(imgData) == 0) + imgData = dir([data_infos.img_path,'*.JPG']);% original image. + extension = '.JPG'; + end + nImg = length(imgData); + for ii= 1:nImg + disp(ii); + [~, name, ~] = fileparts(imgData(ii).name); +% if(~strcmp(name, 'img_11')) +% continue; +% end + + globalVar.imgName = name; + img_path = [data_infos.img_path, imgData(ii).name];%the original image. + + proposalsSavePath = [data_infos.res_path, name, '.txt']; + nMap = 3; + proposals = cell(nMap, 1); + for jj = 1 : nMap + map_path = [data_infos.map_path, name, '_', num2str(jj), extension];%the res image from last phase. + img = imread(img_path); + map = imread(map_path); + map = double(map) / 255; + + [map_h,map_w,~]=size(map); + resizeRatio = size(img, 1) / map_h; + img = imresize(img, [map_h, map_w], 'bilinear'); + + proposals_tmp = gen_proposals(img, map, resizeRatio, minmaxRFs(jj)); + if(isempty(proposals_tmp) == false) + proposals_tmp(:, 1 : 8) = proposals_tmp(:, 1 : 8) * resizeRatio; + end + proposals{jj} = proposals_tmp; + + if(false) + imshow(img); + hold on; + for nProposal = 1 : size(proposals_tmp, 1) + x_arr = proposals_tmp(nProposal, 1 : 2 : 8); + y_arr = proposals_tmp(nProposal, 2 : 2 : 8); + plot([x_arr, x_arr(1)], [y_arr, y_arr(1)], 'color', rand(3,1)); + end + hold off; + saveas(gcf, [data_infos.res_path, name, '_', num2str(jj), '.jpg'], 'jpg'); + end + end + proposals = cell2mat(proposals); + + if(true) + img = imread(img_path); + imshow(img); + hold on; + for nProposal = 1 : size(proposals, 1) + x_arr = proposals(nProposal, 1 : 2 : 8); + y_arr = proposals(nProposal, 2 : 2 : 8); + plot([x_arr, x_arr(1)], [y_arr, y_arr(1)], 'color', rand(3,1)); + end + hold off; + saveas(gcf, [data_infos.res_path, name, '_all.jpg'], 'jpg'); + end + + %% to adapter old code + proposalsToSave = zeros(10, size(proposals, 1)); + if(size(proposalsToSave, 2) ~= 0) + proposalsToSave(1 : 8, :) = round(proposals(:, 1 : 8))'; + proposalsToSave(10, :) = proposals(:, 9); + end + fid = fopen(proposalsSavePath, 'w'); + fprintf(fid, '%d %d %d %d %d %d %d %d %d %f\n', proposalsToSave); + fclose(fid); + end +end \ No newline at end of file diff --git a/ProposalGeneration/minboundrect.m b/ProposalGeneration/minboundrect.m new file mode 100644 index 0000000..205dd77 --- /dev/null +++ b/ProposalGeneration/minboundrect.m @@ -0,0 +1,205 @@ +function [rectx,recty,area,perimeter] = minboundrect(x,y,metric) +% minboundrect: Compute the minimal bounding rectangle of points in the plane +% usage: [rectx,recty,area,perimeter] = minboundrect(x,y,metric) +% +% arguments: (input) +% x,y - vectors of points, describing points in the plane as +% (x,y) pairs. x and y must be the same lengths. +% +% metric - (OPTIONAL) - single letter character flag which +% denotes the use of minimal area or perimeter as the +% metric to be minimized. metric may be either 'a' or 'p', +% capitalization is ignored. Any other contraction of 'area' +% or 'perimeter' is also accepted. +% +% DEFAULT: 'a' ('area') +% +% arguments: (output) +% rectx,recty - 5x1 vectors of points that define the minimal +% bounding rectangle. +% +% area - (scalar) area of the minimal rect itself. +% +% perimeter - (scalar) perimeter of the minimal rect as found +% +% +% Note: For those individuals who would prefer the rect with minimum +% perimeter or area, careful testing convinces me that the minimum area +% rect was generally also the minimum perimeter rect on most problems +% (with one class of exceptions). This same testing appeared to verify my +% assumption that the minimum area rect must always contain at least +% one edge of the convex hull. The exception I refer to above is for +% problems when the convex hull is composed of only a few points, +% most likely exactly 3. Here one may see differences between the +% two metrics. My thanks to Roger Stafford for pointing out this +% class of counter-examples. +% +% Thanks are also due to Roger for pointing out a proof that the +% bounding rect must always contain an edge of the convex hull, in +% both the minimal perimeter and area cases. +% +% +% See also: minboundcircle, minboundtri, minboundsphere +% +% +% default for metric +if (nargin<3) || isempty(metric) + metric = 'a'; +elseif ~ischar(metric) + error 'metric must be a character flag if it is supplied.' +else + % check for 'a' or 'p' + metric = lower(metric(:)'); + ind = strmatch(metric,{'area','perimeter'}); + if isempty(ind) + error 'metric does not match either ''area'' or ''perimeter''' + end + + % just keep the first letter. + metric = metric(1); +end + +% preprocess data +x=x(:); +y=y(:); + +% not many error checks to worry about +n = length(x); +if n~=length(y) + error 'x and y must be the same sizes' +end + + + +% if var(x)==0 + +% start out with the convex hull of the points to +% reduce the problem dramatically. Note that any +% points in the interior of the convex hull are +% never needed, so we drop them. +if n>3 + + %%%%%%%%%%%%%%%%%%%%%%%%% + if (var(x)== 0|| var(y)==0) + if var(x)== 0 + x = [x-1;x(1); x+1 ]; + y = [y ;y(1);y]; + flag = 1; + else + y = [y-1;y(1); y+1 ]; + x = [x ;x(1);x]; + flag = 1; + end + + else + flag = 0; + %%%%%%%%%%%%%%%%%%%%%% + edges = convhull(x,y); % 'Pp' will silence the warnings + + end + + % exclude those points inside the hull as not relevant + % also sorts the points into their convex hull as a + % closed polygon + + %%%%%%%%%%%%%%%%%%%% + if flag == 0 + %%%%%%%%%%%%%%%%%%%% + + x = x(edges); + y = y(edges); + %%%%%%%%%%%%%%%%%% + end + %%%%%%%%%%%%% + % probably fewer points now, unless the points are fully convex + nedges = length(x) - 1; +elseif n>1 + % n must be 2 or 3 + nedges = n; + x(end+1) = x(1); + y(end+1) = y(1); +else + % n must be 0 or 1 + nedges = n; +end + +% now we must find the bounding rectangle of those +% that remain. + +% special case small numbers of points. If we trip any +% of these cases, then we are done, so return. +switch nedges + case 0 + % empty begets empty + rectx = []; + recty = []; + area = []; + perimeter = []; + return + case 1 + % with one point, the rect is simple. + rectx = repmat(x,1,5); + recty = repmat(y,1,5); + area = 0; + perimeter = 0; + return + case 2 + % only two points. also simple. + rectx = x([1 2 2 1 1]); + recty = y([1 2 2 1 1]); + area = 0; + perimeter = 2*sqrt(diff(x).^2 + diff(y).^2); + return +end +% 3 or more points. + +% will need a 2x2 rotation matrix through an angle theta +Rmat = @(theta) [cos(theta) sin(theta);-sin(theta) cos(theta)]; + +% get the angle of each edge of the hull polygon. +ind = 1:(length(x)-1); +edgeangles = atan2(y(ind+1) - y(ind),x(ind+1) - x(ind)); +% move the angle into the first quadrant. +edgeangles = unique(mod(edgeangles,pi/2)); + +% now just check each edge of the hull +nang = length(edgeangles); +area = inf; +perimeter = inf; +met = inf; +xy = [x,y]; +for i = 1:nang + % rotate the data through -theta + rot = Rmat(-edgeangles(i)); + xyr = xy*rot; + xymin = min(xyr,[],1); + xymax = max(xyr,[],1); + + % The area is simple, as is the perimeter + A_i = prod(xymax - xymin); + P_i = 2*sum(xymax-xymin); + + if metric=='a' + M_i = A_i; + else + M_i = P_i; + end + + % new metric value for the current interval. Is it better? + if M_i minmaxRF; + rule4 = bbox(:,3) .* bbox(:,4) > 20; + bbox = bbox(rule1 & rule2 & rule3 & rule4,:); + bbox8d = bbox8d(rule1 & rule2 & rule3 & rule4,:); + bbox_ind = box_nms(bbox, bbox(:,3) .* bbox(:, 4), 0.8); + bbox = bbox(bbox_ind, :); + bbox8d = bbox8d(bbox_ind, :); + + %old code + if false + %record + comp_infos = cell(size(bbox,1),1); + for kk=1:size(bbox,1) + box=bbox(kk,:); + comp_infos{kk}.box=box; + comp_infos{kk}.center=floor([box(1)+box(3)/2,box(2)+box(4)/2]); + end + end + + bbox = mat2cell(bbox, ones(size(bbox, 1), 1), 4); + bbox8d = mat2cell(bbox8d, ones(size(bbox8d, 1), 1), 8); + comp_infos = arrayfun(@localGetCompInfo, bbox, bbox8d); +end + +function out = localGetCompInfo(x, y) + out = cell(1); + x = x{1}; + y = y{1}; + out{1}.box = x; + out{1}.center=floor([x(1)+x(3)/2,x(2)+x(4)/2]); + out{1}.box8d = y; + + x_arr = [y(1 : 2 : 8), y(1)]; + y_arr = [y(2 : 2 : 8), y(2)]; + + edgeLens = ((x_arr(2 : 5) - x_arr(1 : 4)).^2 + (y_arr(2 : 5) - y_arr(1 : 4)).^2).^0.5; + + out{1}.shortEdge = min(edgeLens(1) + edgeLens(3), edgeLens(2) + edgeLens(4))/2; + out{1}.longEdge = max(edgeLens(1) + edgeLens(3), edgeLens(2) + edgeLens(4))/2; +end + +function [boxes8d, boxes4d] = localReadBoxes8d(resPath) + fid = fopen(resPath, 'r'); + boxes8d = fscanf(fid, '%d %d %d %d %d %d %d %d', [8, inf]); + fclose(fid); + boxes8d = boxes8d'; + boxes4d = zeros(size(boxes8d, 1), 4); + if(~isempty(boxes4d)) + boxes4d(:, 1) = min(boxes8d(:, 1 : 2 : 8), [], 2); + boxes4d(:, 2) = min(boxes8d(:, 2 : 2 : 8), [], 2); + boxes4d(:, 3) = max(boxes8d(:, 1 : 2 : 8), [], 2) - boxes4d(:, 1); + boxes4d(:, 4) = max(boxes8d(:, 2 : 2 : 8), [], 2) - boxes4d(:, 2); + else + boxes8d = zeros(0, 8); + end +end + + +function boxes = localReadBoxes(resPath) + fid = fopen(resPath, 'r'); + boxes = fscanf(fid, '%d %d %d %d', [4, inf]); + fclose(fid); + boxes = boxes'; +end \ No newline at end of file diff --git a/ProposalGeneration/normal_mser2.m b/ProposalGeneration/normal_mser2.m new file mode 100644 index 0000000..52aa0a6 --- /dev/null +++ b/ProposalGeneration/normal_mser2.m @@ -0,0 +1,102 @@ +function comp_infos = normal_mser2(src,info) +%this script use the normal version of mser extract regions +%default: one channel +img = imadjust(rgb2gray(src)); +%mser +[bbox,pixelsList,bright_dark_flag] = mser(img,info); +%record +comp_infos={}; +for kk=1:size(bbox,1) + box=bbox(kk,:); + comp_infos(kk).box=box; + comp_infos(kk).center=floor([box(1)+box(3)/2,box(2)+box(4)/2]); + comp_infos(kk).pixelList = pixelsList{kk,1}; + comp_infos(kk).bright_dark_flag = bright_dark_flag(kk); +end + +%%%%%%%% function %%%%%%%% +function [bbBox,pixelsList,bright_dark_flag] = mser(I,info) + % +[h,w]=size(I); +%phase-1: 'BrightOnDark' +[r,f] = vl_mser(I,'MinDiversity',info.minDiversity,'MaxVariation',info.maxVariation,... + 'MaxArea',info.maxArea,'MinArea',info.minArea,'BrightOnDark',1,'DarkOnBright',0,... + 'Delta',info.delta); +M =zeros(size(I)); +for x=r' + s = vl_erfill(I,x); + M(s) = M(s) + 1;%if s belong to one region, add 1. +end +mCount = max(max(M)); +bbBox = []; +pixelsList = {}; +bright_dark_flag=[]; +nCount = 1; +for ii=mCount:-1:1 + MM = M; + MM(find(MM=ii))=1; + mContours = bwlabel(MM,8); + mNum = max(max(mContours)); + for jj=1:mNum + [idx,idy,~] = find(mContours == jj); + ww = max(idx) - min(idx) + 1;%note me: ww & hh are reverse + hh = max(idy) - min(idy) + 1; + %condition + if (ww/(hh+eps)<0.3)||(hh/(ww+eps)<0.3) + continue; + end + if (ww<4||hh<4) || (ww>400|| hh>400) + continue; + end + if ww*hh< 20 + continue; + end + %update + bbBox(nCount,:)=[min(idx),min(idy),ww,hh]; + pixelsList{nCount,1}=[idx,idy]; + bright_dark_flag(nCount)=1; + nCount = nCount + 1; + end +end + +%phase-2: 'DarkOnBright' +[r,f] = vl_mser(I,'MinDiversity',info.minDiversity,'MaxVariation',info.maxVariation,... + 'MaxArea',info.maxArea,'MinArea',info.minArea,'BrightOnDark',0,'DarkOnBright',1,... + 'Delta',info.delta); +M =zeros(size(I)); +for x=r' + s = vl_erfill(I,x); + M(s) = M(s) + 1;%if s belong to one region, add 1. +end +mCount = max(max(M)); +for ii=mCount:-1:1 + MM = M; + MM(find(MM=ii))=1; + mContours = bwlabel(MM,8); + mNum = max(max(mContours)); + for jj=1:mNum + [idx,idy,~] = find(mContours == jj); + ww = max(idx) - min(idx) + 1;%note me: ww & hh are reverse + hh = max(idy) - min(idy) + 1; + %condition + if (ww/(hh+eps)<0.3)||(hh/(ww+eps)<0.3) + continue; + end + if (ww<4||hh<4) || (ww>400|| hh>400) + continue; + end + if ww*hh< 20 + continue; + end + %update + bbBox(nCount,:)=[min(idx),min(idy),ww,hh]; + pixelsList{nCount,1}=[idx,idy]; + bright_dark_flag(nCount)=0; + nCount = nCount + 1; + end +end +end + +end \ No newline at end of file diff --git a/ProposalGeneration/normal_mser3.m b/ProposalGeneration/normal_mser3.m new file mode 100755 index 0000000..974a9f4 --- /dev/null +++ b/ProposalGeneration/normal_mser3.m @@ -0,0 +1,115 @@ +function comp_infos = normal_mser3(src,info, resizeRatio, minmaxRF) +%this script use the normal version of mser extract regions +%default: one channel +img = imadjust(rgb2gray(src)); +%mser +[bbox,pixelsList,bright_dark_flag] = mser(img,info); + +% filter bbox +if(~isempty(bbox)) + rule1 = (bbox(:,3) ./ bbox(:,4)) < 2.5; + rule2 = (bbox(:,4) ./ bbox(:,3)) < 5; + rule3 = (max(bbox(:,3), bbox(:,4)) * resizeRatio) > minmaxRF; + rule4 = bbox(:,3) .* bbox(:,4) > 20; + rule_Ind = rule1 & rule2 & rule3 & rule4; + bbox = bbox(rule_Ind,:); + pixelsList = pixelsList(rule_Ind); +end + +%record +comp_infos=cell(size(bbox,1), 1); +for kk=1:size(bbox,1) + box=bbox(kk,[2 1 4 3]); + comp_infos{kk}.box=box; + comp_infos{kk}.center=floor([box(1)+box(3)/2,box(2)+box(4)/2]); + comp_infos{kk}.pixelList = pixelsList{kk,1}; + comp_infos{kk}.bright_dark_flag = bright_dark_flag(kk); + comp_infos{kk}.box8d = ConvertBox4dToBox8d(box); +end + +%%%%%%%% function %%%%%%%% +function [bbBox,pixelsList,bright_dark_flag] = mser(I,info) + % +[h,w]=size(I); +%phase-1: 'BrightOnDark' +[r,f] = vl_mser(I,'MinDiversity',info.minDiversity,'MaxVariation',info.maxVariation,... + 'MaxArea',info.maxArea,'MinArea',info.minArea,'BrightOnDark',1,'DarkOnBright',0,... + 'Delta',info.delta); +M =zeros(size(I)); +for x=r' + s = vl_erfill(I,x); + M(s) = M(s) + 1;%if s belong to one region, add 1. +end +mCount = max(max(M)); +bbBox = []; +pixelsList = {}; +bright_dark_flag=[]; +nCount = 1; +for ii=mCount:-1:1 + MM = M; + MM(find(MM=ii))=1; + mContours = bwlabel(MM,8); + mNum = max(max(mContours)); + for jj=1:mNum + [idx,idy,~] = find(mContours == jj); + ww = max(idx) - min(idx) + 1;%note me: ww & hh are reverse + hh = max(idy) - min(idy) + 1; + %condition + if (ww/(hh+eps)<0.1)||(hh/(ww+eps)<0.1) + continue; + end + if (ww<4||hh<4) || (ww>400|| hh>400) + continue; + end + if ww*hh< 20 + continue; + end + %update + bbBox(nCount,:)=[min(idx),min(idy),ww,hh]; + pixelsList{nCount,1}=[idx,idy]; + bright_dark_flag(nCount)=1; + nCount = nCount + 1; + end +end + +%phase-2: 'DarkOnBright' +[r,f] = vl_mser(I,'MinDiversity',info.minDiversity,'MaxVariation',info.maxVariation,... + 'MaxArea',info.maxArea,'MinArea',info.minArea,'BrightOnDark',0,'DarkOnBright',1,... + 'Delta',info.delta); +M =zeros(size(I)); +for x=r' + s = vl_erfill(I,x); + M(s) = M(s) + 1;%if s belong to one region, add 1. +end +mCount = max(max(M)); +for ii=mCount:-1:1 + MM = M; + MM(find(MM=ii))=1; + mContours = bwlabel(MM,8); + mNum = max(max(mContours)); + for jj=1:mNum + [idx,idy,~] = find(mContours == jj); + ww = max(idx) - min(idx) + 1;%note me: ww & hh are reverse + hh = max(idy) - min(idy) + 1; + %condition + if (ww/(hh+eps)<0.1)||(hh/(ww+eps)<0.1) + continue; + end + if (ww<4||hh<4) || (ww>400|| hh>400) + continue; + end + if ww*hh< 20 + continue; + end + %update + bbBox(nCount,:)=[min(idx),min(idy),ww,hh]; + pixelsList{nCount,1}=[idx,idy]; + bright_dark_flag(nCount)=0; + nCount = nCount + 1; + end +end +end + +end \ No newline at end of file diff --git a/ProposalGeneration/show_bbox.m b/ProposalGeneration/show_bbox.m new file mode 100644 index 0000000..6abd6ab --- /dev/null +++ b/ProposalGeneration/show_bbox.m @@ -0,0 +1,15 @@ +function show_bbox(im, bbox, colors) + if(nargin == 2) + imshow(im); + for n = 1 : size(bbox,1) + rectangle('position', bbox(n,:), 'edgecolor', rand(3,1)); + end + end + + if(nargin == 3) + imshow(im); + for n = 1 : size(bbox,1) + rectangle('position', bbox(n,:), 'edgecolor', colors(n,:)); + end + end +end diff --git a/README.md b/README.md index 73122a8..c09c24f 100644 --- a/README.md +++ b/README.md @@ -1 +1,22 @@ -# FCN_Text \ No newline at end of file +# FCN_Text +# Code written by Zheng Zhang(macaroniz1990@gmail.com) + +########################################################################################## +1. Introduction. + +This project includes the source code and trained model about the text region fcn and proposal generation. We also provide the probability text region maps for ICDAR2015, ICDAR2013 and MSRA-TD500. If you use the resources of this project, please considering cite the paper: + +Zhang, Zheng, et al. "Multi-oriented text detection with fully convolutional networks." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2016. + +########################################################################################## + +2.Installation + +Dependencies: + +Proposal Generation: Matlab, Python, OpenCV(before 3.0 version), VL_feat, Pitor dollar toolbox. +For caffe version of TextRegionFCN, please install HED(https://github.com/s9xie/hed) at first. +For torch version of TextRegionFCN, please install torch at first. + + + diff --git a/TextRegionFCN_Caffe/CAFFE/deploy.prototxt b/TextRegionFCN_Caffe/CAFFE/deploy.prototxt new file mode 100644 index 0000000..21a4866 --- /dev/null +++ b/TextRegionFCN_Caffe/CAFFE/deploy.prototxt @@ -0,0 +1,129 @@ +name: "FCN" + +input: "data" +input_dim: 1 +input_dim: 3 +input_dim: 500 +input_dim: 500 + +layer { bottom: 'data' top: 'conv1_1' name: 'conv1_1' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 64 pad: 35 kernel_size: 3 } } +layer { bottom: 'conv1_1' top: 'conv1_1' name: 'relu1_1' type: "ReLU" } +layer { bottom: 'conv1_1' top: 'conv1_2' name: 'conv1_2' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 64 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv1_2' top: 'conv1_2' name: 'relu1_2' type: "ReLU" } +layer { name: 'pool1' bottom: 'conv1_2' top: 'pool1' type: "Pooling" + pooling_param { pool: MAX kernel_size: 2 stride: 2 } } + +layer { name: 'conv2_1' bottom: 'pool1' top: 'conv2_1' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 128 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv2_1' top: 'conv2_1' name: 'relu2_1' type: "ReLU" } +layer { bottom: 'conv2_1' top: 'conv2_2' name: 'conv2_2' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 128 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv2_2' top: 'conv2_2' name: 'relu2_2' type: "ReLU" } +layer { bottom: 'conv2_2' top: 'pool2' name: 'pool2' type: "Pooling" + pooling_param { pool: MAX kernel_size: 2 stride: 2 } } + +layer { bottom: 'pool2' top: 'conv3_1' name: 'conv3_1' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 256 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv3_1' top: 'conv3_1' name: 'relu3_1' type: "ReLU" } +layer { bottom: 'conv3_1' top: 'conv3_2' name: 'conv3_2' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 256 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv3_2' top: 'conv3_2' name: 'relu3_2' type: "ReLU" } +layer { bottom: 'conv3_2' top: 'conv3_3' name: 'conv3_3' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 256 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv3_3' top: 'conv3_3' name: 'relu3_3' type: "ReLU" } +layer { bottom: 'conv3_3' top: 'pool3' name: 'pool3' type: "Pooling" + pooling_param { pool: MAX kernel_size: 2 stride: 2 } } + +layer { bottom: 'pool3' top: 'conv4_1' name: 'conv4_1' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv4_1' top: 'conv4_1' name: 'relu4_1' type: "ReLU" } +layer { bottom: 'conv4_1' top: 'conv4_2' name: 'conv4_2' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv4_2' top: 'conv4_2' name: 'relu4_2' type: "ReLU" } +layer { bottom: 'conv4_2' top: 'conv4_3' name: 'conv4_3' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv4_3' top: 'conv4_3' name: 'relu4_3' type: "ReLU" } +layer { bottom: 'conv4_3' top: 'pool4' name: 'pool4' type: "Pooling" + pooling_param { pool: MAX kernel_size: 2 stride: 2 } } + +layer { bottom: 'pool4' top: 'conv5_1' name: 'conv5_1' type: "Convolution" + param { lr_mult: 100 decay_mult: 1 } param { lr_mult: 200 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv5_1' top: 'conv5_1' name: 'relu5_1' type: "ReLU" } +layer { bottom: 'conv5_1' top: 'conv5_2' name: 'conv5_2' type: "Convolution" + param { lr_mult: 100 decay_mult: 1 } param { lr_mult: 200 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv5_2' top: 'conv5_2' name: 'relu5_2' type: "ReLU" } +layer { bottom: 'conv5_2' top: 'conv5_3' name: 'conv5_3' type: "Convolution" + param { lr_mult: 100 decay_mult: 1 } param { lr_mult: 200 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv5_3' top: 'conv5_3' name: 'relu5_3' type: "ReLU" } + +## DSN conv 1 ### +layer { name: 'score-dsn1' type: "Convolution" bottom: 'conv1_2' top: 'score-dsn1-up' + param { lr_mult: 0.01 decay_mult: 1 } param { lr_mult: 0.02 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 1 kernel_size: 1 } } +layer { type: "Crop" name: 'crop' bottom: 'score-dsn1-up' bottom: 'data' top: 'upscore-dsn1' } +layer { type: "Sigmoid" name: "sigmoid-dsn1" bottom: "upscore-dsn1" top:"sigmoid-dsn1"} + +### DSN conv 2 ### +layer { name: 'score-dsn2' type: "Convolution" bottom: 'conv2_2' top: 'score-dsn2' + param { lr_mult: 0.01 decay_mult: 1 } param { lr_mult: 0.02 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 1 kernel_size: 1 } } +layer { type: "Deconvolution" name: 'upsample_2' bottom: 'score-dsn2' top: 'score-dsn2-up' + param { lr_mult: 0 decay_mult: 1 } param { lr_mult: 0 decay_mult: 0} + convolution_param { kernel_size: 4 stride: 2 num_output: 1 } } +layer { type: "Crop" name: 'crop' bottom: 'score-dsn2-up' bottom: 'data' top: 'upscore-dsn2' } +layer { type: "Sigmoid" name: "sigmoid-dsn2" bottom: "upscore-dsn2" top:"sigmoid-dsn2"} + +### DSN conv 3 ### +layer { name: 'score-dsn3' type: "Convolution" bottom: 'conv3_3' top: 'score-dsn3' + param { lr_mult: 0.01 decay_mult: 1 } param { lr_mult: 0.02 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 1 kernel_size: 1 } } +layer { type: "Deconvolution" name: 'upsample_4' bottom: 'score-dsn3' top: 'score-dsn3-up' + param { lr_mult: 0 decay_mult: 1 } param { lr_mult: 0 decay_mult: 0} + convolution_param { kernel_size: 8 stride: 4 num_output: 1 } } +layer { type: "Crop" name: 'crop' bottom: 'score-dsn3-up' bottom: 'data' top: 'upscore-dsn3' } +layer { type: "Sigmoid" name: "sigmoid-dsn3" bottom: "upscore-dsn3" top:"sigmoid-dsn3"} + +###DSN conv 4### +layer { name: 'score-dsn4' type: "Convolution" bottom: 'conv4_3' top: 'score-dsn4' + param { lr_mult: 0.01 decay_mult: 1 } param { lr_mult: 0.02 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 1 kernel_size: 1 } } +layer { type: "Deconvolution" name: 'upsample_8' bottom: 'score-dsn4' top: 'score-dsn4-up' + param { lr_mult: 0 decay_mult: 1 } param { lr_mult: 0 decay_mult: 0} + convolution_param { kernel_size: 16 stride: 8 num_output: 1 } } +layer { type: "Crop" name: 'crop' bottom: 'score-dsn4-up' bottom: 'data' top: 'upscore-dsn4' } +layer { type: "Sigmoid" name: "sigmoid-dsn4" bottom: "upscore-dsn4" top:"sigmoid-dsn4"} + +###DSN conv 5### +layer { name: 'score-dsn5' type: "Convolution" bottom: 'conv5_3' top: 'score-dsn5' + param { lr_mult: 0.01 decay_mult: 1 } param { lr_mult: 0.02 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 1 kernel_size: 1 } } +layer { type: "Deconvolution" name: 'upsample_16' bottom: 'score-dsn5' top: 'score-dsn5-up' + param { lr_mult: 0 decay_mult: 1 } param { lr_mult: 0 decay_mult: 0} + convolution_param { kernel_size: 32 stride: 16 num_output: 1 } } +layer { type: "Crop" name: 'crop' bottom: 'score-dsn5-up' bottom: 'data' top: 'upscore-dsn5' } +layer { type: "Sigmoid" name: "sigmoid-dsn5" bottom: "upscore-dsn5" top:"sigmoid-dsn5"} + +### Concat and multiscale weight layer ### +layer { name: "concat" bottom: "upscore-dsn1" bottom: "upscore-dsn2" bottom: "upscore-dsn3" + bottom: "upscore-dsn4" bottom: "upscore-dsn5" top: "concat-upscore" type: "Concat" + concat_param { concat_dim: 1} } +layer { name: 'new-score-weighting' type: "Convolution" bottom: 'concat-upscore' top: 'upscore-fuse' + param { lr_mult: 0.01 decay_mult: 1 } param { lr_mult: 0.02 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 1 kernel_size: 1 weight_filler {type: "constant" value: 0.2} } } +layer { type: "Sigmoid" name: "sigmoid-fuse" bottom: "upscore-fuse" top:"sigmoid-fuse"} + diff --git a/TextRegionFCN_Caffe/CAFFE/hed_iter_20000.caffemodel b/TextRegionFCN_Caffe/CAFFE/hed_iter_20000.caffemodel new file mode 100644 index 0000000..7c202b2 Binary files /dev/null and b/TextRegionFCN_Caffe/CAFFE/hed_iter_20000.caffemodel differ diff --git a/TextRegionFCN_Caffe/CAFFE/hed_iter_20000.solverstate b/TextRegionFCN_Caffe/CAFFE/hed_iter_20000.solverstate new file mode 100644 index 0000000..14dbdb7 Binary files /dev/null and b/TextRegionFCN_Caffe/CAFFE/hed_iter_20000.solverstate differ diff --git a/TextRegionFCN_Caffe/CAFFE/solve.py b/TextRegionFCN_Caffe/CAFFE/solve.py new file mode 100644 index 0000000..499aaab --- /dev/null +++ b/TextRegionFCN_Caffe/CAFFE/solve.py @@ -0,0 +1,58 @@ +from __future__ import division +import numpy as np +import sys +caffe_root = '../../' +sys.path.insert(0, caffe_root + 'python') +import caffe + +# make a bilinear interpolation kernel +# credit @longjon +def upsample_filt(size): + factor = (size + 1) // 2 + if size % 2 == 1: + center = factor - 1 + else: + center = factor - 0.5 + og = np.ogrid[:size, :size] + return (1 - abs(og[0] - center) / factor) * \ + (1 - abs(og[1] - center) / factor) + +# set parameters s.t. deconvolutional layers compute bilinear interpolation +# N.B. this is for deconvolution without groups +def interp_surgery(net, layers): + for l in layers: + m, k, h, w = net.params[l][0].data.shape + if m != k: + print 'input + output channels need to be the same' + raise + if h != w: + print 'filters need to be square' + raise + filt = upsample_filt(h) + net.params[l][0].data[range(m), range(k), :, :] = filt + +# base net -- follow the editing model parameters example to make +# a fully convolutional VGG16 net. +# http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/net_surgery.ipynb +base_weights = 'hed_iter_24000.caffemodel' + +# init +caffe.set_mode_gpu() +caffe.set_device(2) + +solver = caffe.SGDSolver('solver.prototxt') + +# do net surgery to set the deconvolution weights for bilinear interpolation +interp_layers = [k for k in solver.net.params.keys() if 'up' in k] +interp_surgery(solver.net, interp_layers) + +# copy base weights for fine-tuning +#solver.restore('dsn-full-res-3-scales_iter_29000.solverstate') +solver.net.copy_from(base_weights) + +# solve straight through -- a better approach is to define a solving loop to +# 1. take SGD steps +# 2. score the model by the test net `solver.test_nets[0]` +# 3. repeat until satisfied +solver.step(100000) + diff --git a/TextRegionFCN_Caffe/CAFFE/solver.prototxt b/TextRegionFCN_Caffe/CAFFE/solver.prototxt new file mode 100644 index 0000000..82ce1f3 --- /dev/null +++ b/TextRegionFCN_Caffe/CAFFE/solver.prototxt @@ -0,0 +1,19 @@ +net: "train_val.prototxt" +test_iter: 0 +test_interval: 1000 +# lr for fine-tuning should be lower than when starting from scratch +#debug_info: true +base_lr: 0.00000001 +lr_policy: "step" +gamma: 0.1 +iter_size: 10 +# stepsize should also be lower, as we're closer to being done +stepsize: 10000 +display: 20 +max_iter: 30001 +momentum: 0.9 +weight_decay: 0.0002 +snapshot: 1000 +snapshot_prefix: "hed" +# uncomment the following to default to CPU mode solving +# solver_mode: CPU diff --git a/TextRegionFCN_Caffe/CAFFE/train_val.prototxt b/TextRegionFCN_Caffe/CAFFE/train_val.prototxt new file mode 100644 index 0000000..c95323b --- /dev/null +++ b/TextRegionFCN_Caffe/CAFFE/train_val.prototxt @@ -0,0 +1,165 @@ +name: "HED" +layer { + name: "data" + type: "ImageLabelmapData" + top: "data" + top: "label" + include { + phase: TRAIN + } + transform_param { + mirror: false + mean_value: 104.00699 + mean_value: 116.66877 + mean_value: 122.67892 + } + image_data_param { + root_folder: "../../data/" + source: "../../data/train_pair.list" + batch_size: 1 + shuffle: true + new_height: 0 + new_width: 0 + } +} +layer { + name: "data" + type: "ImageLabelmapData" + top: "data" + top: "label" + include { + phase: TEST + } + transform_param { + mirror: false + mean_value: 104.00699 + mean_value: 116.66877 + mean_value: 122.67892 + } + image_data_param { + root_folder: "../../data/" + source: "../../data/val_pair.list" + #Just setup the network. No real online testing + batch_size: 1 + shuffle: true + new_height: 0 + new_width: 0 + } +} + +layer { bottom: 'data' top: 'conv1_1' name: 'conv1_1' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 64 pad: 35 kernel_size: 3 } } +layer { bottom: 'conv1_1' top: 'conv1_1' name: 'relu1_1' type: "ReLU" } +layer { bottom: 'conv1_1' top: 'conv1_2' name: 'conv1_2' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 64 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv1_2' top: 'conv1_2' name: 'relu1_2' type: "ReLU" } +layer { name: 'pool1' bottom: 'conv1_2' top: 'pool1' type: "Pooling" + pooling_param { pool: MAX kernel_size: 2 stride: 2 } } + +layer { name: 'conv2_1' bottom: 'pool1' top: 'conv2_1' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 128 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv2_1' top: 'conv2_1' name: 'relu2_1' type: "ReLU" } +layer { bottom: 'conv2_1' top: 'conv2_2' name: 'conv2_2' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 128 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv2_2' top: 'conv2_2' name: 'relu2_2' type: "ReLU" } +layer { bottom: 'conv2_2' top: 'pool2' name: 'pool2' type: "Pooling" + pooling_param { pool: MAX kernel_size: 2 stride: 2 } } + +layer { bottom: 'pool2' top: 'conv3_1' name: 'conv3_1' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 256 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv3_1' top: 'conv3_1' name: 'relu3_1' type: "ReLU" } +layer { bottom: 'conv3_1' top: 'conv3_2' name: 'conv3_2' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 256 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv3_2' top: 'conv3_2' name: 'relu3_2' type: "ReLU" } +layer { bottom: 'conv3_2' top: 'conv3_3' name: 'conv3_3' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 256 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv3_3' top: 'conv3_3' name: 'relu3_3' type: "ReLU" } +layer { bottom: 'conv3_3' top: 'pool3' name: 'pool3' type: "Pooling" + pooling_param { pool: MAX kernel_size: 2 stride: 2 } } + +layer { bottom: 'pool3' top: 'conv4_1' name: 'conv4_1' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv4_1' top: 'conv4_1' name: 'relu4_1' type: "ReLU" } +layer { bottom: 'conv4_1' top: 'conv4_2' name: 'conv4_2' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv4_2' top: 'conv4_2' name: 'relu4_2' type: "ReLU" } +layer { bottom: 'conv4_2' top: 'conv4_3' name: 'conv4_3' type: "Convolution" + param { lr_mult: 1 decay_mult: 1 } param { lr_mult: 2 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv4_3' top: 'conv4_3' name: 'relu4_3' type: "ReLU" } +layer { bottom: 'conv4_3' top: 'pool4' name: 'pool4' type: "Pooling" + pooling_param { pool: MAX kernel_size: 2 stride: 2 } } + +layer { bottom: 'pool4' top: 'conv5_1' name: 'conv5_1' type: "Convolution" + param { lr_mult: 100 decay_mult: 1 } param { lr_mult: 200 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv5_1' top: 'conv5_1' name: 'relu5_1' type: "ReLU" } +layer { bottom: 'conv5_1' top: 'conv5_2' name: 'conv5_2' type: "Convolution" + param { lr_mult: 100 decay_mult: 1 } param { lr_mult: 200 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv5_2' top: 'conv5_2' name: 'relu5_2' type: "ReLU" } +layer { bottom: 'conv5_2' top: 'conv5_3' name: 'conv5_3' type: "Convolution" + param { lr_mult: 100 decay_mult: 1 } param { lr_mult: 200 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 512 pad: 1 kernel_size: 3 } } +layer { bottom: 'conv5_3' top: 'conv5_3' name: 'relu5_3' type: "ReLU" } + +## DSN conv 1 ### +layer { name: 'score-dsn1' type: "Convolution" bottom: 'conv1_2' top: 'score-dsn1-up' + param { lr_mult: 0.01 decay_mult: 1 } param { lr_mult: 0.02 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 1 kernel_size: 1 } } +layer { type: "Crop" name: 'crop' bottom: 'score-dsn1-up' bottom: 'data' top: 'upscore-dsn1' } + +### DSN conv 2 ### +layer { name: 'score-dsn2' type: "Convolution" bottom: 'conv2_2' top: 'score-dsn2' + param { lr_mult: 0.01 decay_mult: 1 } param { lr_mult: 0.02 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 1 kernel_size: 1 } } +layer { type: "Deconvolution" name: 'upsample_2' bottom: 'score-dsn2' top: 'score-dsn2-up' + param { lr_mult: 0 decay_mult: 1 } param { lr_mult: 0 decay_mult: 0} + convolution_param { kernel_size: 4 stride: 2 num_output: 1 } } +layer { type: "Crop" name: 'crop' bottom: 'score-dsn2-up' bottom: 'data' top: 'upscore-dsn2' } + +### DSN conv 3 ### +layer { name: 'score-dsn3' type: "Convolution" bottom: 'conv3_3' top: 'score-dsn3' + param { lr_mult: 0.01 decay_mult: 1 } param { lr_mult: 0.02 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 1 kernel_size: 1 } } +layer { type: "Deconvolution" name: 'upsample_4' bottom: 'score-dsn3' top: 'score-dsn3-up' + param { lr_mult: 0 decay_mult: 1 } param { lr_mult: 0 decay_mult: 0} + convolution_param { kernel_size: 8 stride: 4 num_output: 1 } } +layer { type: "Crop" name: 'crop' bottom: 'score-dsn3-up' bottom: 'data' top: 'upscore-dsn3' } + +###DSN conv 4### +layer { name: 'score-dsn4' type: "Convolution" bottom: 'conv4_3' top: 'score-dsn4' + param { lr_mult: 0.01 decay_mult: 1 } param { lr_mult: 0.02 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 1 kernel_size: 1 } } +layer { type: "Deconvolution" name: 'upsample_8' bottom: 'score-dsn4' top: 'score-dsn4-up' + param { lr_mult: 0 decay_mult: 1 } param { lr_mult: 0 decay_mult: 0} + convolution_param { kernel_size: 16 stride: 8 num_output: 1 } } +layer { type: "Crop" name: 'crop' bottom: 'score-dsn4-up' bottom: 'data' top: 'upscore-dsn4' } + +###DSN conv 5### +layer { name: 'score-dsn5' type: "Convolution" bottom: 'conv5_3' top: 'score-dsn5' + param { lr_mult: 0.01 decay_mult: 1 } param { lr_mult: 0.02 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 1 kernel_size: 1 } } +layer { type: "Deconvolution" name: 'upsample_16' bottom: 'score-dsn5' top: 'score-dsn5-up' + param { lr_mult: 0 decay_mult: 1 } param { lr_mult: 0 decay_mult: 0} + convolution_param { kernel_size: 32 stride: 16 num_output: 1 } } +layer { type: "Crop" name: 'crop' bottom: 'score-dsn5-up' bottom: 'data' top: 'upscore-dsn5' } + +### Concat and multiscale weight layer ### +layer { name: "concat" bottom: "upscore-dsn1" bottom: "upscore-dsn2" bottom: "upscore-dsn3" + bottom: "upscore-dsn4" bottom: "upscore-dsn5" top: "concat-upscore" type: "Concat" + concat_param { concat_dim: 1} } +layer { name: 'new-score-weighting' type: "Convolution" bottom: 'concat-upscore' top: 'upscore-fuse' + param { lr_mult: 0.001 decay_mult: 1 } param { lr_mult: 0.002 decay_mult: 0} + convolution_param { engine: CAFFE num_output: 1 kernel_size: 1 weight_filler {type: "constant" value: 0.2} } } +layer { type: "SigmoidCrossEntropyLoss" bottom: "upscore-fuse" bottom: "label" top:"fuse_loss" loss_weight: 1} + diff --git a/TextRegionFCN_Torch/.DS_Store b/TextRegionFCN_Torch/.DS_Store new file mode 100644 index 0000000..2db6259 Binary files /dev/null and b/TextRegionFCN_Torch/.DS_Store differ diff --git a/TextRegionFCN_Torch/Crop.lua b/TextRegionFCN_Torch/Crop.lua new file mode 100644 index 0000000..95abc44 --- /dev/null +++ b/TextRegionFCN_Torch/Crop.lua @@ -0,0 +1,76 @@ +local Crop, Parent = torch.class('nn.Crop', 'nn.Module') + +function Crop:__init() + Parent.__init(self) + self.mask = torch.Tensor() +end + +function Crop:updateOutput(_input) +-- input_data: bottom data +-- input: current laye's input, format:nBatch x nChanel x H x W or nChannel x H x W, just one scale for per training time + local input_data = _input[1]; + local input = _input[2]; + --self.output:resizeAs(input_data) --:copy(input_data) + + if input_data:dim() == 3 then -- one image + self.output:resize(input:size(1),input_data:size(2),input_data:size(3)) --:copy(input_data) + self.mask = torch.Tensor(1,4) -- pad_l, pad_r,pad_t,pad_b + self.mask[{1,1}]=math.floor((input:size(2) - input_data:size(2))/2) + self.mask[{1,2}]=(input:size(2)-input_data:size(2)) - self.mask[{1,1}] + self.mask[{1,3}]=math.floor((input:size(3) - input_data:size(3))/2) + self.mask[{1,4}]=(input:size(3) - input_data:size(3)) - self.mask[{1,3}] + -- update: crop input + self.output:copy(input[{{},{self.mask[{1,1}]+1,self.mask[{1,1}]+input_data:size(2)},{self.mask[{1,3}]+1,self.mask[{1,3}]+input_data:size(3)}}]); + + elseif input_data:dim() == 4 then -- batch + self.output:resize(input:size(1),input:size(2),input_data:size(3),input_data:size(4)) --:copy(input_data) + --self.mask = torch.Tensor(input_data:size(1),4) + self.mask = torch.Tensor(1,4) -- pad_l, pad_r,pad_t,pad_b + self.mask[{1,1}]=math.floor((input:size(3) - input_data:size(3))/2) + self.mask[{1,2}]=(input:size(3)-input_data:size(3)) - self.mask[{1,1}] + self.mask[{1,3}]=math.floor((input:size(4) - input_data:size(4))/2) + self.mask[{1,4}]=(input:size(4) - input_data:size(4)) - self.mask[{1,3}] + -- update: crop input + self.output:copy(input[{{},{},{self.mask[{1,1}]+1,self.mask[{1,1}]+input_data:size(3)},{self.mask[{1,3}]+1,self.mask[{1,3}]+input_data:size(4)}}]) + --self.output:copy(input[{{},{},{self.mask[{1,1}],self.mask[{1,2}]},{self.mask[{1,3}],self.mask[{1,4}]}}]) + else + error(' illegal input, must be 3 D or 4 D') + end + -- update crop input + return self.output +end + + +function Crop:updateGradInput(_input,gradOutput) + --self.gradInput = torch.Tensor() + if gradOutput:dim() == 3 then + self.gradInput:resize(gradOutput:size(1),gradOutput:size(2)+self.mask[{1,1}]+self.mask[{1,2}],gradOutput:size(3)+self.mask[{1,3}]+self.mask[{1,4}]) + self.gradInput:fill(0) + self.gradInput[{{},{self.mask[{1,1}]+1,self.mask[{1,1}]+gradOutput:size(2)},{self.mask[{1,3}]+1,self.mask[{1,3}]+gradOutput:size(3)}}]:copy(gradOutput) + elseif gradOutput:dim() == 4 then + self.gradInput:resize(gradOutput:size(1),gradOutput:size(2),gradOutput:size(3)+self.mask[{1,1}]+self.mask[{1,2}],gradOutput:size(4)+self.mask[{1,3}]+self.mask[{1,4}]) + self.gradInput:fill(0) + self.gradInput[{{},{},{self.mask[{1,1}]+1,self.mask[{1,1}]+gradOutput:size(3)},{self.mask[{1,3}]+1,self.mask[{1,3}]+gradOutput:size(4)}}]:copy(gradOutput) + else + error(' illegal gradOutput, must be 3 D or 4 D') + end + return {nil,self.gradInput} +end + +function Crop:accGradParameters(_input, gradOutput, scale) +end + + +function Crop:forward(_input) +-- rewrite forward + return self:updateOutput(_input) +end + +function Crop:backward(_input, gradOutput, scale) + scale = scale or 1 + self:updateGradInput(_input, gradOutput) + self:accGradParameters(_input, gradOutput, scale) + return self.gradInput +end + + diff --git a/TextRegionFCN_Torch/TableCopy.lua b/TextRegionFCN_Torch/TableCopy.lua new file mode 100644 index 0000000..d04f54a --- /dev/null +++ b/TextRegionFCN_Torch/TableCopy.lua @@ -0,0 +1,48 @@ +local TableCopy, parent = torch.class('nn.TableCopy', 'nn.Module') + +function TableCopy:__init(intype,outtype,nTable,forceCopy,dontCast) + intype = intype or torch.Tensor.__typename + outtype = outtype or torch.Tensor.__typename + self.nt = nTable + self.dontCast = dontCast + parent.__init(self) + self.gradInput = {} + self.output = {} + for ii=1,self.nt do + self.gradInput[ii]=torch.getmetatable(intype).new() + self.output[ii]=torch.getmetatable(outtype).new() + end + if (not forceCopy) and intype == outtype then + self.updateOutput = function(self, input) + self.output = input + return input + end + + self.updateGradInput = function(self, input, gradOutput) + self.gradInput = gradOutput + return gradOutput + end + end + +end + +function TableCopy:updateOutput(input) + for ii=1,self.nt do + self.output[ii]:resize(input[ii]:size()):copy(input[ii]) + end + return self.output +end + +function TableCopy:updateGradInput(input, gradOutput) + for ii=1,self.nt do + self.gradInput[ii]:resize(gradOutput[ii]:size()):copy(gradOutput[ii]) + end + return self.gradInput +end + +function TableCopy:type(type) + if type and self.dontCast then + return self + end + return parent.type(self, type) +end \ No newline at end of file diff --git a/TextRegionFCN_Torch/TableSelect.lua b/TextRegionFCN_Torch/TableSelect.lua new file mode 100644 index 0000000..2fff1bd --- /dev/null +++ b/TextRegionFCN_Torch/TableSelect.lua @@ -0,0 +1,47 @@ +local TableSelect, parent = torch.class('nn.TableSelect', 'nn.Module') + +function TableSelect:__init(index)--{1,2} + parent.__init(self) + self.index = index --also table + self.gradInput = {} + self.output = {} +end + +function TableSelect:updateOutput(input) + assert(type(input)=='table','input must be table') + for ii=1,table.getn(self.index) do + self.output[ii]=input[self.index[ii]] + end + return self.output +end + +function TableSelect:updateGradInput(input, gradOutput) + assert(type(gradOutput)=='table','gradOutput must be table') + for ii=1,table.getn(input) do -- table value can't be nil, which is dangerous + self.gradInput[ii]=input[ii]:clone():fill(0) + end + for ii=1,table.getn(self.index) do + if gradOutput[ii]~=nil then --crop return nil + self.gradInput[self.index[ii]]:copy(gradOutput[ii]) + end + end + return self.gradInput +end + +function TableSelect:type(type) + self.gradInput = {} + self.output = {} + return parent.type(self, type) +end + +function TableSelect:forward(input) +-- rewrite forward + return self:updateOutput(input) +end + +function TableSelect:backward(input, gradOutput, scale) + scale = scale or 1 + self:updateGradInput(input, gradOutput) + self:accGradParameters(input, gradOutput, scale) + return self.gradInput +end diff --git a/TextRegionFCN_Torch/Training.lua b/TextRegionFCN_Torch/Training.lua new file mode 100644 index 0000000..13a3c82 --- /dev/null +++ b/TextRegionFCN_Torch/Training.lua @@ -0,0 +1,218 @@ +function generate_mean(trainData) + local mean_data = torch.Tensor(trainData:size(2),trainData:size(3),trainData:size(4)):fill(0) + local batchSize = 128 + local batchData = torch.Tensor(batchSize,trainData:size(2),trainData:size(3),trainData:size(4)) + local nBatch = math.floor(trainData:size(1) / batchSize) + local mean_weights = torch.Tensor(nBatch):fill(1) + if (trainData:size(1) % batchSize) == 0 then + mean_weights:fill(1) + else + nBatch = nBatch + 1 + mean_weights = torch.Tensor(nBatch):fill(1) + mean_weights[nBatch] = (trainData:size(1) % batchSize) / batchSize + end + local mean_datas = torch.Tensor(nBatch,trainData:size(2),trainData:size(3),trainData:size(4)):fill(0) + local bc = 1 + for ii=1,trainData:size(1),batchSize do + local idx_end = math.min(ii+batchSize-1,trainData:size(1)) + if(idx_end-ii+1)==batchSize then + batchData:copy(trainData[{{ii,idx_end},{},{},{}}]) + local mean_temp = torch.mean(batchData,1) + mean_datas[{bc,{},{},{}}]:copy(mean_temp) + bc = bc + 1 + else + local subBatchData=batchData:narrow(1,1,idx_end-ii+1) + subBatchData:copy(trainData[{{ii,idx_end},{},{},{}}]) + local mean_temp = torch.mean(subBatchData,1) + mean_datas[{bc,{},{},{}}]:copy(mean_temp) + end + end + for ii=1,nBatch do + local weight = mean_weights[ii] + local mean_tmp = mean_datas[{ii,{},{},{}}] + mean_tmp = mean_tmp:view(trainData:size(2),trainData:size(3),trainData:size(4)) + mean_data:add(weight,mean_tmp) + end + mean_data:div(torch.sum(mean_weights)) + return mean_data +end + +function train_model(model,criterion,settings) + -- read config + local dataInfo = settings.data_info + local inputInfo = settings.model_config.input_size + local optimState = settings.optimSettings + --local optimMethod = optim.adagrad + local optimMethod = optimizer_adagrad + local minLoss = math.huge + local maxPatience = settings.maxPatience + local minLearningRate = optimState.learningRate/settings.nLRChange + --local batchSize = data_info.batchSize + logging('load dataset ->\n') + local trainData = torch.load(dataInfo.train_data_dir) + local trainLabel = torch.load(dataInfo.train_gt_dir) + local testData = torch.load(dataInfo.test_data_dir) + local testLabel = torch.load(dataInfo.test_gt_dir) + local nTrain = trainData:size(1) + local nTest = trainData:size(1) + logging('dataset loaded !\n') + local dataCuda = torch.CudaTensor(dataInfo.batchSize,inputInfo[2],inputInfo[3],inputInfo[4]) + local labelCuda = torch.DoubleTensor(dataInfo.batchSize,inputInfo[3]*inputInfo[4]) + local parameters,gradParameters = model:parameters() + -- discriminate file exist, function defined in Utilities.lua + if (file_exists(dataInfo.mean_data_dir)) then + mean_data = torch.load(dataInfo.mean_data_dir); + else + -- generate mean data + mean_data = generate_mean(trainData) + torch.save(dataInfo.mean_data_dir,mean_data) + end + --local mean_data = torch.load(dataInfo.mean_data_dir); + mean_data = mean_data:float() + mean_data = image.scale(mean_data, inputInfo[4], inputInfo[3]) + logging('data mean loaded!') + --mean_data = image.scale(mean_data, inputInfo[3], inputInfo[4]); + -- whitening + function whitening(data) + -- can be changed, given by the detail + if data:dim() ~= 4 then + print('data muset be 4D') + end + data =data:float() + local data_tmp = torch.FloatTensor(data:size()) + for ii=1,3 do + data_tmp[{{},ii,{},{}}]:copy(data[{{},3-ii+1,{},{}}]) + end + data:copy(data_tmp) + for ii=1,data:size(1) do + data[{ii,{},{},{}}]:add(-1,mean_data) + --data[{ii,{},{},{}}]:add(data[{ii,{},{},{}}],-1,mean_data) + end + return data + end + -- get one batch + function getOneBatch() + local idx = torch.LongTensor():randperm(nTrain):narrow(1,1,dataInfo.batchSize) + + --print(idx) + local batchData = torch.FloatTensor(dataInfo.batchSize,inputInfo[2],inputInfo[3],inputInfo[4]) + local batchLabel = torch.FloatTensor(dataInfo.batchSize,labelCuda:size(2)) + for ii = 1,dataInfo.batchSize do + index = idx[{ii}] + local img = torch.Tensor(inputInfo[1],inputInfo[2],inputInfo[3],inputInfo[4]):copy(trainData[{index,{},{},{}}]) + -- whitening + img = whitening(img) + -- copy data -- copy label + batchData[{ii,{},{},{}}]:copy(img) + batchLabel[{ii,{}}]:copy(trainLabel[{index,{}}]) + end + return batchData,batchLabel + end + -- train one batch + function trainOneBatch(input,target) + local nFrame = input:size(1) + dataCuda:copy(input) + labelCuda:copy(target) + local feval = function(x) + if x ~= parameters then + --parameters:copy(x) + for ii=1,#x do + parameters[ii]:copy(x[ii]) + end + end + --gradParameters:zero() + for ii=1,#gradParameters do + gradParameters[ii]:zero() + end + local f = 0 + local output = model:forward(dataCuda) + local f = criterion:forward(output,labelCuda) + model:backward(dataCuda,criterion:backward(output,labelCuda)) + -- + for ii=1,#gradParameters do + gradParameters[ii]:div(nFrame) + end + f = f/nFrame + return f,gradParameters + end + local _,trainloss = optimMethod(feval,parameters,optimState);trainloss = trainloss[1] + return trainloss + end + -- doTest() + function doTest() + local testLoss = 0 + for t=1,testData:size(1),dataInfo.batchSize do + local idx_end = math.min(t+dataInfo.batchSize-1,testData:size(1)) + if(idx_end-t+1)==dataInfo.batchSize then + local imgs = torch.Tensor(dataInfo.batchSize,inputInfo[2],inputInfo[3],inputInfo[4]):copy(testData[{{t,idx_end},{},{},{}}]) + imgs = whitening(imgs) + dataCuda:copy(imgs) + labelCuda:copy(testLabel[{{t,idx_end},{}}]) + local output = model:forward(dataCuda) + testLoss = testLoss + criterion:forward(output,labelCuda) + else + local subDataCuda = dataCuda:narrow(1,1,idx_end-t+1) + local subLabelCuda= labelCuda:narrow(1,1,idx_end-t+1) + local imgs = torch.Tensor(idx_end-t+1,inputInfo[2],inputInfo[3],inputInfo[4]):copy(testData[{{t,idx_end},{},{},{}}]) + imgs = whitening(imgs) + subDataCuda:copy(imgs) + subLabelCuda:copy(testLabel[{{t,idx_end},{}}]) + local output = model:forward(subDataCuda) + testLoss = testLoss + criterion:forward(output,subLabelCuda) + end + end + testLoss = testLoss/testData:size(1) + logging(string.format('loss on test dataset ->\n%s', testLoss)) + return testLoss + end + + -- main for training + local iterations = 0 + local loss = 0 + local patience = maxPatience + while(true)do + -- train one batch + local input,target = getOneBatch() + loss = loss + trainOneBatch(input,target) + iterations = iterations + 1 + -- display the loss, at interval + if iterations % settings.displayInterval == 0 then + loss = loss / settings.displayInterval + logging(string.format('Iteration %d - train loss = %f, lr = %f', + iterations, loss, optimState.learningRate)) + loss = 0 + end + -- display the testing result + if iterations % settings.testInterval == 0 then + logging('Testing ...') + model:evaluate() + local testLoss = doTest() + logging(string.format('Test ERROR is:%f (bestErr = %f)',testLoss,minLoss)) + if testLoss < minLoss then + minLoss = testLoss + patience = maxPatience + else + patience = patience - 1 + if patience<0 then + optimState.learningRate = 0.1*optimState.learningRate + if optimState.learningRate < minLearningRate then + logging('Maximum patience reached,terminating') + break + end + patience = maxPatience + end + end + + model:training() + end + -- store the models + if iterations % settings.snapShotInterval == 0 then + torch.save(string.format('model_11_2/model_%d.t7', iterations),parameters) + end + if iterations >= settings.maxIterations then + logging('Maximum iterations reached, terminating ...') + break + end + end + return model +end diff --git a/TextRegionFCN_Torch/Utilities.lua b/TextRegionFCN_Torch/Utilities.lua new file mode 100644 index 0000000..bfaea32 --- /dev/null +++ b/TextRegionFCN_Torch/Utilities.lua @@ -0,0 +1,53 @@ +function setupLogger(fpath) + local fileMode = 'w' + if paths.filep(fpath) then + local input = nil + while not input do + print('Logging file exits, overwrite(o)? append(a)? abort(q)?') + input = io.read() + if input == 'o' then + fileMode = 'w' + elseif input == 'a' then + fileMode = 'a' + elseif input == 'q' then + os.exit() + else + fileMode = nil + end + end + end + gLoggerFile = io.open(fpath, fileMode) +end + +function shutdownLogger() + if gLoggerFile then + gLoggerFile:close() + end +end + +function logging(message) + local M + if type(message) == 'table' then + M = message + else + M = {message} + end + + for i = 1,#M do + local timeStamp = os.date('%x %X') + local msgFormatted = string.format('[%s] %s', timeStamp, M[i]) + print(msgFormatted) + if gLoggerFile then + gLoggerFile:write(msgFormatted .. '\n') + gLoggerFile:flush() + end + end +end + +function file_exists(path) + local file = io.open(path, "rb") + if file then + file:close() + end + return file ~= nil +end diff --git a/TextRegionFCN_Torch/create_model.lua b/TextRegionFCN_Torch/create_model.lua new file mode 100644 index 0000000..70b9afc --- /dev/null +++ b/TextRegionFCN_Torch/create_model.lua @@ -0,0 +1,312 @@ +function splitter(config) + local conv,deconv = nn.Sequential(),nn.Sequential() + --print(config) + for ii=1,#config do + local conf = config[ii] + --print(conf) -- fixed can add convolution + if conf[1] == 'conv' then + conv:add(nn.SpatialConvolutionMM(conf[2][1],conf[2][2],conf[2][3],conf[2][4],conf[2][5],conf[2][6],conf[2][7],conf[2][8])) + elseif conf[1]=='relu' then + conv:add(nn.ReLU(true)) + elseif conf[1]=='pool' then + conv:add(nn.SpatialMaxPooling(conf[2][1],conf[2][2],conf[2][3],conf[2][4])) + elseif conf[1]=='split' then + if conf[2] == nil then + deconv = nil + else + deconv:add(nn.SpatialConvolutionMM(conf[2][1],conf[2][2],conf[2][3],conf[2][4],conf[2][5],conf[2][6],conf[2][7],conf[2][8])) + end + end + end + if deconv == nil then + return conv + else + return conv,deconv + end +end +function upSampling(config) + local up_config,table_config = config[1],config[2] + local upSample = nn.Sequential() + upSample:add(nn.TableSelect(table_config)) + -- nn.gModule() + local input_1 = nn.Identity()() + local input_2 = nn.Identity()() + if #up_config == 1 then + local crop = nn.Crop()({input_1,input_2}):annotate{up_config[1][2]} + local up = nn.gModule({input_1,input_2},{crop}) + upSample:add(up) + return upSample + else -- add more one convolutional layer --nn.SpatialDeconvolution + local deconv = nn.SpatialFullConvolution(up_config[1][2][1],up_config[1][2][2],up_config[1][2][3],up_config[1][2][4],up_config[1][2][5],up_config[1][2][6])(input_2):annotate{up_config[1][3]} + local crop = nn.Crop()({input_1,deconv}):annotate{up_config[2][2]} + local up = nn.gModule({input_1,input_2},{crop}) + upSample:add(up) + return upSample + end +end + +function create_vgg_model(config) + --local vgg_weights = torch.load(config.vgg_16_weight_dir) + local vgg_weights = torch.Tensor() + if config.load_weight_flag then + vgg_weights = torch.load(config.vgg_16_weight_dir) + end + local model,model_concat = nn.Sequential(), nn.ConcatTable() + ---- pahse-1 convolution step ---- + -- split 1 + local config_1 = {{'conv',{3,64,3,3,1,1,35,35}},{'relu'},{'conv',{64,64,3,3,1,1,1,1}},{'relu'},{'split',{64, 1, 1, 1, 1, 1, 0, 0}}} + local conv1,deconv1=splitter(config_1) + -- convert parameters + if config.load_weight_flag then + local pas,gradPas = conv1:parameters() + for ii=1,2 do + local wb = vgg_weights[ii] + local w,b = wb.w.w,wb.b.b + pas[(ii-1)*2+1]:copy(w) + pas[(ii-1)*2+2]:copy(b) + end + end + -- split 2 + local config_2 = {{'pool',{2,2,2,2}},{'conv',{64, 128, 3, 3, 1, 1, 1, 1}},{'relu'},{'conv',{128, 128, 3, 3, 1, 1, 1, 1}},{'relu'},{'split',{128, 1, 1, 1, 1, 1, 0, 0}}} + local conv2,deconv2=splitter(config_2) + -- convert parameters + if config.load_weight_flag then + local pas,gradPas = conv2:parameters() + for ii=1,2 do + local wb = vgg_weights[ii+2] + local w,b = wb.w.w,wb.b.b + pas[(ii-1)*2+1]:copy(w) + pas[(ii-1)*2+2]:copy(b) + end + end + -- split 3 + local config_3 = {{'pool',{2,2,2,2}},{'conv',{128, 256, 3, 3, 1, 1, 1, 1}},{'relu'},{'conv',{256, 256, 3, 3, 1, 1, 1, 1}},{'relu'},{'conv',{256, 256, 3, 3, 1, 1, 1, 1}},{'relu'},{'split',{256, 1, 1, 1, 1, 1, 0, 0}}} + local conv3,deconv3=splitter(config_3) + -- convert parameters + if config.load_weight_flag then + local pas,gradPas = conv3:parameters() + for ii=1,3 do + local wb = vgg_weights[ii+4] + local w,b = wb.w.w,wb.b.b + pas[(ii-1)*2+1]:copy(w) + pas[(ii-1)*2+2]:copy(b) + end + end + -- split 4 + local config_4 = {{'pool',{2,2,2,2}},{'conv',{256, 512, 3, 3, 1, 1, 1, 1}},{'relu'},{'conv',{512, 512, 3, 3, 1, 1, 1, 1}},{'relu'},{'conv',{512, 512, 3, 3, 1, 1, 1, 1}},{'relu'},{'split',{512, 1, 1, 1, 1, 1, 0, 0}}} + local conv4,deconv4=splitter(config_4) + -- convert parameters + if config.load_weight_flag then + local pas,gradPas = conv4:parameters() + for ii=1,3 do + local wb = vgg_weights[ii+7] + local w,b = wb.w.w,wb.b.b + pas[(ii-1)*2+1]:copy(w) + pas[(ii-1)*2+2]:copy(b) + end + end + -- split 5 + local config_5 = {{'pool',{2,2,2,2}},{'conv',{512, 512, 3, 3, 1, 1, 1, 1}},{'relu'},{'conv',{512, 512, 3, 3, 1, 1, 1, 1}},{'relu'},{'conv',{512, 512, 3, 3, 1, 1, 1, 1}},{'relu'},{'conv',{512, 1, 1, 1, 1, 1, 0, 0}}} + local conv5=splitter(config_5)--deconv5 is nil + if config.load_weight_flag then + local pas,gradPas = conv5:parameters() + for ii=1,3 do + local wb = vgg_weights[ii+10] + local w,b = wb.w.w,wb.b.b + pas[(ii-1)*2+1]:copy(w) + pas[(ii-1)*2+2]:copy(b) + end + end + ---- phase-2 concat step ---- + local split1,split2,split3,split4 =nn.ConcatTable(),nn.ConcatTable(),nn.ConcatTable(),nn.ConcatTable() + split4:add(conv5) + split4:add(deconv4) + conv4:add(split4) + -- + split3:add(conv4) + split3:add(deconv3) + conv3:add(split3) + -- + split2:add(conv3) + split2:add(deconv2) + conv2:add(split2) + -- + split1:add(conv2) + split1:add(deconv1) + conv1:add(split1) + ---- phase-3 add bottom_data split ---- + local bottom_data = nn.Identity() + model_concat:add(conv1) + model_concat:add(bottom_data) + model:add(model_concat) + model:add(nn.FlattenTable()) --conv3,conv2,conv1,bottom_data + if config.useGPU == 'gpu' then + model=model:cuda() + model:add(nn.TableCopy('torch.CudaTensor','torch.DoubleTensor',6))--nTable is equal to 6 + end + --return model,nil + ---- phase-4 upsampling step ---- + local config_up1 = {{{'crop','crop_1'}},{6,5}} + local upSampling1 = upSampling(config_up1) + local config_up2 = {{{'deconv',{1,1,4,4,2,2},'deconv_2'},{'crop','crop_2'}},{6,4}} + local upSampling2 = upSampling(config_up2) + local config_up3 = {{{'deconv',{1,1,8,8,4,4},'deconv_3'},{'crop','crop_3'}},{6,3}} + local upSampling3 = upSampling(config_up3) + local config_up4 = {{{'deconv',{1,1,16,16,8,8},'deconv_4'},{'crop','crop_4'}},{6,2}} + local upSampling4 = upSampling(config_up4) + local config_up5 = {{{'deconv',{1,1,32,32,16,16},'deconv_5'},{'crop','crop_5'}},{6,1}} + local upSampling5 = upSampling(config_up5) + local deconv = nn.ConcatTable() + deconv:add(upSampling1) + deconv:add(upSampling2) + deconv:add(upSampling3) + deconv:add(upSampling4) + deconv:add(upSampling5) + --return model,deconv + + ---- phase-5 fuse all deconv splits: initial maybe 1/5 ---- + model:add(deconv) + model:add(nn.JoinTable(2)) + local fuse = nn.SpatialConvolutionMM(5,1,1,1,1,1,0,0) + local fuse_pas,fuse_pas_grad = fuse:parameters() + fuse_pas[1]:fill(0.2) + fuse_pas[2]:fill(0) + model:add(fuse) + --model:add(nn.SpatialConvolutionMM(5,1,1,1,1,1,0,0)) + model:add(nn.Sigmoid()) + + ---- phase-6 if train, add return criterion + if config.train_test_flag == 'train' then + model:add(nn.Reshape(config.input_size[3]*config.input_size[4])) + ---- default loss function BCECriterion() + local criterion = nn.BCECriterion() + if config.class_criterion ~= 'BCECriterion' then + error('the loss function must be BCECriterion!') + end + return model,criterion + else -- test + return model + end + +end + +function create_vgg_model_small(config) + --load weights + local vgg_weights = torch.load(config.vgg_16_weight_dir) + + local model,model_concat = nn.Sequential(), nn.ConcatTable() + ---- pahse-1 convolution step ---- + -- split 1 + local config_1 = {{'conv',{3,64,3,3,1,1,35,35}},{'relu'},{'conv',{64,64,3,3,1,1,1,1}},{'relu'},{'split',{64, 1, 1, 1, 1, 1, 0, 0}}} + local conv1,deconv1=splitter(config_1) + if config.load_weight_flag then + local pas,gradPas = conv1:parameters() + for ii=1,2 do + local wb = vgg_weights[ii] + local w,b = wb.w.w,wb.b.b + pas[(ii-1)*2+1]:copy(w) + pas[(ii-1)*2+2]:copy(b) + end + end + --return conv1,deconv1 + -- split 2 + local config_2 = {{'pool',{2,2,2,2}},{'conv',{64, 128, 3, 3, 1, 1, 1, 1}},{'relu'},{'conv',{128, 128, 3, 3, 1, 1, 1, 1}},{'relu'},{'split',{128, 1, 1, 1, 1, 1, 0, 0}}} + local conv2,deconv2=splitter(config_2) + if config.load_weight_flag then + local pas,gradPas = conv2:parameters() + for ii=1,2 do + local wb = vgg_weights[ii+2] + local w,b = wb.w.w,wb.b.b + pas[(ii-1)*2+1]:copy(w) + pas[(ii-1)*2+2]:copy(b) + end + end + -- split 3 + local config_3 = {{'pool',{2,2,2,2}},{'conv',{128, 256, 3, 3, 1, 1, 1, 1}},{'relu'},{'conv',{256, 256, 3, 3, 1, 1, 1, 1}},{'relu'},{'conv',{256, 256, 3, 3, 1, 1, 1, 1}},{'relu'},{'split',{256, 1, 1, 1, 1, 1, 0, 0}}} + local conv3,deconv3=splitter(config_3) + if config.load_weight_flag then + local pas,gradPas = conv3:parameters() + for ii=1,3 do + local wb = vgg_weights[ii+4] + local w,b = wb.w,wb.b + pas[(ii-1)*2+1]:copy(w) + pas[(ii-1)*2+2]:copy(b) + end + end + -- split 4 + local config_4 = {{'pool',{2,2,2,2}},{'conv',{256, 512, 3, 3, 1, 1, 1, 1}},{'relu'},{'conv',{512, 512, 3, 3, 1, 1, 1, 1}},{'relu'},{'conv',{512, 512, 3, 3, 1, 1, 1, 1}},{'relu'},{'conv',{512, 1, 1, 1, 1, 1, 0, 0}}} + local conv4=splitter(config_4) + if config.load_weight_flag then + local pas,gradPas = conv4:parameters() + for ii=1,3 do + local wb = vgg_weights[ii+7] + local w,b = wb.w.w,wb.b.b + pas[(ii-1)*2+1]:copy(w) + pas[(ii-1)*2+2]:copy(b) + end + end + ---- phase-2 concat step ---- + local split1,split2,split3 =nn.ConcatTable(),nn.ConcatTable(),nn.ConcatTable() + split3:add(conv4) + split3:add(deconv3) + conv3:add(split3) + -- + split2:add(conv3) + split2:add(deconv2) + conv2:add(split2) + -- + split1:add(conv2) + split1:add(deconv1) + conv1:add(split1) + ---- phase-3 add bottom_data split ---- + local bottom_data = nn.Identity() + model_concat:add(conv1) + model_concat:add(bottom_data) + model:add(model_concat) + model:add(nn.FlattenTable()) --conv3,conv2,conv1,bottom_data + if config.useGPU == 'gpu' then + model=model:cuda() + model:add(nn.TableCopy('torch.CudaTensor','torch.DoubleTensor',5))--nTable is equal to 6 + end + --return model,nil + ---- phase-4 upsampling step ---- + local config_up1 = {{{'crop','crop_1'}},{5,4}} + local upSampling1 = upSampling(config_up1) + local config_up2 = {{{'deconv',{1,1,4,4,2,2},'deconv_2'},{'crop','crop_2'}},{5,3}} + local upSampling2 = upSampling(config_up2) + local config_up3 = {{{'deconv',{1,1,8,8,4,4},'deconv_3'},{'crop','crop_3'}},{5,2}} + local upSampling3 = upSampling(config_up3) + local config_up4 = {{{'deconv',{1,1,16,16,8,8},'deconv_4'},{'crop','crop_4'}},{5,1}} + local upSampling4 = upSampling(config_up4) + --local config_up5 = {{{'deconv',{1,1,32,32,16,16},'deconv_5'},{'crop','crop_5'}},{6,1}} + --local upSampling5 = upSampling(config_up5) + local deconv = nn.ConcatTable() + deconv:add(upSampling1) + deconv:add(upSampling2) + deconv:add(upSampling3) + deconv:add(upSampling4) + --return model,deconv + + ---- phase-5 fuse all deconv splits ---- + model:add(deconv) + model:add(nn.JoinTable(2)) + model:add(nn.SpatialConvolutionMM(4,1,1,1,1,1,0,0)) + model:add(nn.Sigmoid()) + + ---- phase-6 if train, add return criterion + if config.train_test_flag == 'train' then + model:add(nn.Reshape(config.input_size[3]*config.input_size[4])) + ---- default loss function BCECriterion() + local criterion = nn.BCECriterion() + if config.class_criterion ~= 'BCECriterion' then + error('the loss function must be BCECriterion!') + end + return model,criterion + else -- test + return model + end + +end + + + diff --git a/TextRegionFCN_Torch/data/ilsvrc_2012_mean.t7 b/TextRegionFCN_Torch/data/ilsvrc_2012_mean.t7 new file mode 100644 index 0000000..b95b048 Binary files /dev/null and b/TextRegionFCN_Torch/data/ilsvrc_2012_mean.t7 differ diff --git a/TextRegionFCN_Torch/main.lua b/TextRegionFCN_Torch/main.lua new file mode 100644 index 0000000..66a598a --- /dev/null +++ b/TextRegionFCN_Torch/main.lua @@ -0,0 +1,70 @@ +require 'nn' +require 'cunn' +require 'nngraph' +require 'torch' +require 'cutorch' +require 'Utilities' +require 'Training' +require 'Crop' +require 'create_model' +require 'TableCopy' +require 'TableSelect' +require 'optimizer' +require 'image' +require 'mattorch' +--torch.setdefaulttensortype('torch.FloatTensor') +setupLogger('log.txt') -- record logs +--cutorch.setDevice(1) + +local settings = { + nClasses = 2, + --classWeight = torch.Tensor({1,1}), + displayInterval = 500, + testInterval = 50000,--50000,--3000,--1000 + snapShotInterval = 10000,--5000,--20000 + maxIterations = 2000000, + maxPatience = 5, + nLRChange = 10e3, + nBoost = 5, + model_config = { + input_size = {1,3,500,500},--{1,3,384,512},--batch format + load_weight_flag = true, + vgg_16_weight_dir = './data/vgg_weights.t7', + useGPU = 'gpu', -- or 'cpu' + train_test_flag = 'train',-- or 'test' + class_criterion = 'BCECriterion',--equal to sigmoid cross entropy loss: E=-1/n*sigma[p*logp+(1-p)log(1-p)] + }, + data_info = { + train_data_dir = '../dataset/11_2/trainData.t7', + train_gt_dir = '../dataset/11_2/trainLabel.t7', + test_data_dir = '../dataset/11_2/testData.t7', + test_gt_dir = '../dataset/11_2/testLabel.t7', + mean_data_dir = './data/ilsvrc_2012_mean.t7',--./data/synth_mean.t7', + batchSize = 1 + }, + --optimMethod = optim.adagrad + optimSettings = { + learningRate = 1e-4,-->13 * 20 + weightDecay = 5e-4, + momentum = 0.9, + learningRateDecay = 0, + }, + +} +-- create model -- +model,criterion = create_vgg_model(settings.model_config) +--model,criterion = create_vgg_model(settings.model_config) +-- load params -- +paras_init = torch.load('./model_11_2/model_init.t7') +paras,gradParas=model:parameters() +for ii=1,#paras do + paras[ii]:copy(paras_init[ii]) +end +paras_init = nil +collectgarbage() +--- +model:training() +--begin training -- +logging('begin to train model ->\n') +model = train_model(model,criterion,settings) +logging('model training over!\n') diff --git a/TextRegionFCN_Torch/model/model_final.t7 b/TextRegionFCN_Torch/model/model_final.t7 new file mode 100644 index 0000000..c802e3a Binary files /dev/null and b/TextRegionFCN_Torch/model/model_final.t7 differ diff --git a/TextRegionFCN_Torch/model/model_pas.t7 b/TextRegionFCN_Torch/model/model_pas.t7 new file mode 100644 index 0000000..d139d72 Binary files /dev/null and b/TextRegionFCN_Torch/model/model_pas.t7 differ diff --git a/TextRegionFCN_Torch/optimizer.lua b/TextRegionFCN_Torch/optimizer.lua new file mode 100644 index 0000000..089ddae --- /dev/null +++ b/TextRegionFCN_Torch/optimizer.lua @@ -0,0 +1,44 @@ +function optimizer_adagrad(opfunc, x, config, state) +--opfunc: outputs including loss(fx),grad_loss(df/dx); x:parameters; + -- (0) get/update state + if config == nil and state == nil then + print('no state table, ADAGRAD initializing') + end + local config = config or {} + local state = state or config + local lr = config.learningRate or 1e-3 + local lrd = config.learningRateDecay or 0 + state.evalCounter = state.evalCounter or 0 + local nevals = state.evalCounter + + -- (1) evaluate f(x) and df/dx + local fx,dfdx = opfunc(x) + + -- (3) learning rate decay (annealing) + local clr = lr / (1 + nevals*lrd) + + -- (4) parameter update with single or individual learning rates + if not state.paramVariance then + state.paramVariance = {} + state.paramStd = {} + for ii=1,#x do + state.paramVariance[ii] = torch.Tensor():typeAs(x[ii]):resizeAs(dfdx[ii]):zero() + state.paramStd[ii] = torch.Tensor():typeAs(x[ii]):resizeAs(dfdx[ii]) + end + end + for ii=1,#x do + state.paramVariance[ii]:addcmul(1,dfdx[ii],dfdx[ii]) + state.paramStd[ii]:resizeAs(state.paramVariance[ii]):copy(state.paramVariance[ii]):sqrt() + if ii<=26 then + x[ii]:addcdiv(-clr, dfdx[ii],state.paramStd[ii]:add(1e-10)) + else + x[ii]:addcdiv(-clr*1e1, dfdx[ii],state.paramStd[ii]:add(1e-10)) + end + end + + -- (5) update evaluation counter + state.evalCounter = state.evalCounter + 1 + + -- return x*, f(x) before optimization + return x,{fx} +end \ No newline at end of file diff --git a/TextRegionFCN_Torch/test.lua b/TextRegionFCN_Torch/test.lua new file mode 100644 index 0000000..ed98d57 --- /dev/null +++ b/TextRegionFCN_Torch/test.lua @@ -0,0 +1,119 @@ +require 'nn' +require 'cunn' +require 'nngraph' +require 'torch' +require 'cutorch' +require 'Utilities' +require 'Training' +require 'Crop' +require 'create_model' +require 'TableCopy' +require 'TableSelect' +require 'optimizer' +require 'image' +require 'mattorch' +torch.setnumthreads(2) +--cutorch.setDevice(2) + +local settings = { + nClasses = 2, + model_dir = './model/model_final.t7', + -- img_dir = {'/home/mkyang/dataset/ICDAR2013/Challenge2_Test_Task12_Images/', '/home/mkyang/dataset/ICDAR2015/ch4_test_images/', '/home/mkyang/dataset/MSRA/im/'}, + -- res_dir = {'./results/icdar13/', './results/icdar15/', './results/msra/'}, + -- img_dir = {'/home/mkyang/dataset/ICDAR2015/ch4_test_images/', '/home/mkyang/dataset/MSRA/im/'}, + -- res_dir = {'./results/icdar15/', './results/msra/'}, + img_dir = {'/share/datasets/SceneText/scene_text/MSRA-TD500/test/'}, + res_dir = {'./results/msra_test/'}, + model_config = { + input_size = {1,3,500,500},--{1,3,384,512},--batch format + load_weight_flag = false, + useGPU = 'gpu', -- or 'cpu' + train_test_flag = 'test',-- or 'test' + class_criterion = 'BCECriterion', + }, + data_info = { + mean_data_dir = './data/ilsvrc_2012_mean.t7',--./data/synth_mean.t7', + batchSize = 1 + }, + heights = {200,500,1000}--,1200} -- {200,400,800} --{360,720,1080} +} +local mean_data = torch.load(settings.data_info.mean_data_dir); + +function list_img(path) + --print(path) + local i,t,popen = 0,{},io.popen + for file_name in popen('ls -a ' .. path .. ' |grep JPG'):lines() do + i = i + 1 + t[i] = file_name + end + return t +end + +function whitening(data) + -- can be changed, given by the detail + if data:dim() ~= 4 then + print('data muset be 4D') + end + data =data:float() + local mean = image.scale(mean_data, data:size(4),data:size(3)) + mean = mean:view(1,data:size(2),data:size(3),data:size(4)) + local data_tmp = torch.FloatTensor(data:size()) + for ii=1,3 do + data_tmp[{{},ii,{},{}}]:copy(data[{{},3-ii+1,{},{}}]) + end + data:copy(data_tmp) + for ii=1,data:size(1) do + data[{ii,{},{},{}}]:add(-1,mean) + end + return data +end + +-- create model -- +model = create_vgg_model(settings.model_config) +local trained_pas = torch.load(settings.model_dir) +local pas,gradPas=model:parameters() +for ii=1,#pas do + pas[ii]:copy(trained_pas[ii]) +end +model:evaluate() +---- load test dataset ---- +for i=1,table.getn(settings.img_dir) do + + local img_dir = settings.img_dir[i] --msra_dir + local res_dir = settings.res_dir[i] --msra_res_dir + print('begin evaluate on ' .. img_dir) + local img_names = list_img(img_dir) + local nImg = table.getn(img_names) + + print(nImg) + for ii=1,nImg do + local timer = torch.Timer() + + local src=image.load(img_dir .. img_names[ii] ) + local src_h,src_w = src:size(2),src:size(3) + for jj=1,table.getn(settings.heights) do + if src_h>src_w then + h = settings.heights[jj] + w = math.floor(h*src_w/src_h) + else + w = settings.heights[jj] + h = math.floor(w*src_h/src_w) + end + + local img = image.scale(src,w,h) + -- preparation + local input = img:clone():mul(255):view(1,3,h,w) + input = whitening(input):double() + -- forward + local output = model:forward(input:cuda()):clone():float():view(1,input:size(3),input:size(4)) + -- save + local save_path = res_dir .. string.sub(img_names[ii],1,-5) .. '_' .. jj .. '.jpg' + image.save(save_path,output) + end + -- print(timer:time()) + print(ii .. '/' .. nImg) + end + print('over!') +end + + diff --git a/data/.DS_Store b/data/.DS_Store new file mode 100644 index 0000000..fd70b4f Binary files /dev/null and b/data/.DS_Store differ diff --git a/data/icdar13/multiscale.zip b/data/icdar13/multiscale.zip new file mode 100644 index 0000000..5f20cb4 Binary files /dev/null and b/data/icdar13/multiscale.zip differ diff --git a/data/icdar15/multiscale.zip b/data/icdar15/multiscale.zip new file mode 100644 index 0000000..0e7bf7b Binary files /dev/null and b/data/icdar15/multiscale.zip differ diff --git a/data/msra/.DS_Store b/data/msra/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/data/msra/.DS_Store differ diff --git a/data/msra/multiscale.zip b/data/msra/multiscale.zip new file mode 100644 index 0000000..a19168b Binary files /dev/null and b/data/msra/multiscale.zip differ