Skip to content

Commit 65f2bc1

Browse files
authored
KOPTContext: add comic panel detection function/additional leptonica declarations (koreader#1159)
Used to find manga/comic panels. It uses connected component labeling, for detection. The downside of this algorithm is that panels joined by text, "word clouds" etc. are detected as one entity. Compeletely open (not having borders) panels may sometimes fail to register.
1 parent f146a4f commit 65f2bc1

File tree

3 files changed

+100
-0
lines changed

3 files changed

+100
-0
lines changed

ffi-cdecl/leptonica_cdecl.c

+17
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ cdecl_type(l_float64)
1414

1515
cdecl_struct(Box)
1616
cdecl_type(BOX)
17+
cdecl_struct(Sel)
18+
cdecl_type(Sel)
19+
1720
cdecl_struct(Boxa)
1821
cdecl_type(BOXA)
1922
cdecl_struct(Numa)
@@ -35,6 +38,7 @@ cdecl_func(numaCreate)
3538
cdecl_func(numaCreateFromFArray)
3639
cdecl_func(boxaCombineOverlaps)
3740
cdecl_func(boxaClipToBox)
41+
cdecl_func(pixConnCompBB)
3842
cdecl_func(boxCopy)
3943
cdecl_func(boxClone)
4044
cdecl_func(boxOverlapRegion)
@@ -58,12 +62,25 @@ cdecl_func(pixGetWpl)
5862
cdecl_func(pixSetPixel)
5963
cdecl_func(pixGetData)
6064
cdecl_func(pixCreate)
65+
cdecl_func(pixClone)
6166
cdecl_func(pixConvertTo1)
6267
cdecl_func(pixThresholdToBinary)
68+
cdecl_func(pixConvertRGBToGrayFast)
6369
cdecl_func(pixConvertTo32)
6470
cdecl_func(pixDrawBoxaRandom)
6571
cdecl_func(pixMultiplyByColor)
6672
cdecl_func(pixBlendBackgroundToColor)
73+
cdecl_func(pixBlockconv)
74+
cdecl_func(pixRenderContours)
75+
cdecl_func(pixInvert)
76+
cdecl_func(pixClipRectangle)
77+
cdecl_func(pixOpen)
78+
cdecl_func(pixClose)
79+
cdecl_func(pixErode)
6780
cdecl_func(pixGetRegionsBinary)
6881
cdecl_func(pixSplitIntoBoxa)
6982
cdecl_func(pixReduceRankBinaryCascade)
83+
cdecl_func(selCreate)
84+
cdecl_func(selSetElement)
85+
cdecl_func(selPrintToString)
86+
cdecl_func(selDestroy)

ffi/koptcontext.lua

+60
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,66 @@ function KOPTContext_mt.__index:findPageBlocks()
257257
end
258258
end
259259

260+
function KOPTContext_mt.__index:getPanelFromPage(pos)
261+
local function isInRect(x, y, w, h, pos_x, pos_y)
262+
return x < pos_x and y < pos_y and x + w > pos_x and y + h > pos_y
263+
end
264+
265+
if self.src.data then
266+
local pixs = k2pdfopt.bitmap2pix(self.src, 0, 0, self.src.width, self.src.height)
267+
local pixg
268+
if leptonica.pixGetDepth(pixs) == 32 then
269+
pixg = leptonica.pixConvertRGBToGrayFast(pixs)
270+
else
271+
pixg = leptonica.pixClone(pixs)
272+
end
273+
274+
-- leptonica's threshold gets pixels lighter than X, we want to get
275+
-- pixels darker than X, to do that we invert the image, threshold it,
276+
-- and invert the result back. Math: ~(~img < X) <=> img > X
277+
local pix_inverted = leptonica.pixInvert(nil, pixg)
278+
local pix_thresholded = leptonica.pixThresholdToBinary(pix_inverted, 50)
279+
leptonica.pixInvert(pix_thresholded, pix_thresholded)
280+
281+
-- find connected components (in our case panels)
282+
local bb = leptonica.pixConnCompBB(pix_thresholded, 8)
283+
284+
local img_w = leptonica.pixGetWidth(pixs)
285+
local img_h = leptonica.pixGetHeight(pixs)
286+
local res
287+
288+
for i = 0, leptonica.boxaGetCount(bb) - 1 do
289+
local box = leptonica.boxaGetBox(bb, i, C.L_CLONE)
290+
local pix_tmp = leptonica.pixClipRectangle(pixs, box, nil)
291+
local w = leptonica.pixGetWidth(pix_tmp)
292+
local h = leptonica.pixGetHeight(pix_tmp)
293+
-- check if it's panel or part of the panel, if it's part of the panel skip
294+
if w >= img_w / 8 and h >= img_h / 8 then
295+
if isInRect(box.x, box.y, box.w, box.h, pos.x, pos.y) then
296+
res = {
297+
x = box.x,
298+
y = box.y,
299+
w = box.w,
300+
h = box.h,
301+
}
302+
leptonica.pixDestroy(ffi.new("PIX *[1]", pix_tmp))
303+
leptonica.boxDestroy(ffi.new("BOX *[1]", box))
304+
break -- we found panel, exit the loop and clean up memory
305+
end
306+
end
307+
leptonica.pixDestroy(ffi.new("PIX *[1]", pix_tmp))
308+
leptonica.boxDestroy(ffi.new("BOX *[1]", box))
309+
end
310+
311+
-- free up memory
312+
leptonica.boxaDestroy(ffi.new("BOXA *[1]", bb))
313+
leptonica.pixDestroy(ffi.new("PIX *[1]", pixg))
314+
leptonica.pixDestroy(ffi.new("PIX *[1]", pix_thresholded))
315+
leptonica.pixDestroy(ffi.new("PIX *[1]", pixs))
316+
return res
317+
end
318+
end
319+
260320
--[[
261321
-- get page block in location x, y both of which in range [0, 1] relative to page
262322
-- width and height respectively

ffi/leptonica_h.lua

+23
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,23 @@ struct Pix {
5959
l_uint32 *data;
6060
};
6161
typedef struct Pix PIX;
62+
struct Sel {
63+
l_int32 sx;
64+
l_int32 sy;
65+
l_int32 cx;
66+
l_int32 cy;
67+
l_uint32 **data;
68+
char *name;
69+
};
70+
typedef struct Sel SEL;
6271
BOX *boxCreate(l_int32, l_int32, l_int32, l_int32);
6372
BOXA *boxaCreate(l_int32);
6473
BOX *boxaGetBox(BOXA *, l_int32, l_int32);
6574
NUMA *numaCreate(l_int32);
6675
NUMA *numaCreateFromFArray(l_float32 *, l_int32, l_int32);
6776
BOXA *boxaCombineOverlaps(BOXA *);
6877
BOXA *boxaClipToBox(BOXA *, BOX *);
78+
BOXA *pixConnCompBB(PIX *, l_uint32);
6979
BOX *boxCopy(BOX *);
7080
BOX *boxClone(BOX *);
7181
BOX *boxOverlapRegion(BOX *, BOX *);
@@ -89,13 +99,26 @@ l_int32 pixGetWpl(PIX *);
8999
l_int32 pixSetPixel(PIX *, l_int32, l_int32, l_uint32);
90100
l_uint32 *pixGetData(PIX *);
91101
PIX *pixCreate(l_int32, l_int32, l_int32);
102+
PIX *pixClone(PIX *);
92103
PIX *pixConvertTo1(PIX *, l_int32);
93104
PIX *pixThresholdToBinary(PIX *, l_int32);
105+
PIX *pixConvertRGBToGrayFast(PIX *);
94106
PIX *pixConvertTo32(PIX *);
95107
PIX *pixDrawBoxaRandom(PIX *, BOXA *, l_int32);
96108
PIX *pixMultiplyByColor(PIX *, PIX *, BOX *, l_uint32);
97109
PIX *pixBlendBackgroundToColor(PIX *, PIX *, BOX *, l_uint32, l_float32, l_int32, l_int32);
110+
PIX *pixBlockconv(PIX *, l_uint32, l_uint32);
111+
PIX *pixRenderContours(PIX *, l_uint32, l_uint32, l_uint32);
112+
PIX *pixInvert(PIX *, PIX *);
113+
PIX *pixClipRectangle(PIX *, BOX *, BOX **);
114+
PIX *pixOpen(PIX *, PIX *, SEL *);
115+
PIX *pixClose(PIX *, PIX *, SEL *);
116+
PIX *pixErode(PIX *, PIX *, SEL *);
98117
l_int32 pixGetRegionsBinary(PIX *, PIX **, PIX **, PIX **, struct Pixa *);
99118
BOXA *pixSplitIntoBoxa(PIX *, l_int32, l_int32, l_int32, l_int32, l_int32, l_int32);
100119
PIX *pixReduceRankBinaryCascade(PIX *, l_int32, l_int32, l_int32, l_int32);
120+
SEL *selCreate(l_int32, l_int32, char *);
121+
l_int32 selSetElement(SEL *, l_int32, l_int32, l_int32);
122+
char *selPrintToString(SEL *);
123+
void selDestroy(SEL **);
101124
]]

0 commit comments

Comments
 (0)