16
16
along with this program. If not, see <http://www.gnu.org/licenses/>.
17
17
*/
18
18
19
+ #include <assert.h>
19
20
#include "koptcontext.h"
20
21
21
22
static int newKOPTContext (lua_State * L ) {
@@ -47,6 +48,7 @@ static int newKOPTContext(lua_State *L) {
47
48
48
49
BBox bbox = {0 , 0 , 0 , 0 };
49
50
int precache = 0 ;
51
+ int debug = 0 ;
50
52
51
53
KOPTContext * kc = (KOPTContext * ) lua_newuserdata (L , sizeof (KOPTContext ));
52
54
@@ -78,6 +80,10 @@ static int newKOPTContext(lua_State *L) {
78
80
79
81
kc -> bbox = bbox ;
80
82
kc -> precache = precache ;
83
+ kc -> debug = debug ;
84
+
85
+ kc -> boxa = NULL ;
86
+ kc -> nai = NULL ;
81
87
82
88
bmp_init (& kc -> src );
83
89
bmp_init (& kc -> dst );
@@ -95,6 +101,8 @@ static int freeContext(lua_State *L) {
95
101
* bitmap since the usage of dst bitmap is delayed most of the times.
96
102
*/
97
103
bmp_free (& kc -> dst );
104
+ boxaDestroy (& kc -> boxa );
105
+ numaDestroy (& kc -> nai );
98
106
return 0 ;
99
107
}
100
108
@@ -249,6 +257,123 @@ static int kcIsPreCache(lua_State *L) {
249
257
return 1 ;
250
258
}
251
259
260
+ static int kcSetDebug (lua_State * L ) {
261
+ KOPTContext * kc = (KOPTContext * ) luaL_checkudata (L , 1 , "koptcontext" );
262
+ kc -> debug = 1 ;
263
+ return 0 ;
264
+ }
265
+
266
+ static int kcGetWordBoxes (lua_State * L ) {
267
+ KOPTContext * kc = (KOPTContext * ) luaL_checkudata (L , 1 , "koptcontext" );
268
+ int x = luaL_checkint (L , 2 );
269
+ int y = luaL_checkint (L , 3 );
270
+ int w = luaL_checkint (L , 4 );
271
+ int h = luaL_checkint (L , 5 );
272
+ BOX * box ;
273
+ l_float32 max_val ;
274
+ int nr_line , last_index , nr_word , current_line ;
275
+ int counter_l , counter_w , counter_cw ;
276
+ int l_x0 , l_y0 , l_x1 , l_y1 ;
277
+
278
+ k2pdfopt_get_word_boxes (kc , & kc -> dst , x , y , w , h , 1 , 10 , 10 , 300 , 100 );
279
+ /* get number of lines in this area */
280
+ numaGetMax (kc -> nai , & max_val , & last_index );
281
+ nr_line = (int ) max_val ;
282
+ /* get number of lines in this area */
283
+ nr_word = boxaGetCount (kc -> boxa );
284
+ assert (nr_word == numaGetCount (kc -> nai ));
285
+ /* table that contains all the words */
286
+ lua_newtable (L );
287
+ lua_pushstring (L , "box_only" );
288
+ lua_pushnumber (L , 1 );
289
+ lua_settable (L , -3 );
290
+ for (counter_w = 0 ; counter_w < nr_word ; counter_w ++ ) {
291
+ numaGetIValue (kc -> nai , counter_w , & counter_l );
292
+ current_line = counter_l ;
293
+ /* subtable that contains words in a line */
294
+ lua_pushnumber (L , counter_l + 1 );
295
+ lua_newtable (L );
296
+ counter_cw = 0 ;
297
+ l_y0 = l_x0 = 9999 ;
298
+ l_x1 = l_y1 = 0 ;
299
+ while (current_line == counter_l && counter_w < nr_word ) {
300
+ box = boxaGetBox (kc -> boxa , counter_w , L_CLONE );
301
+ /* create table that contains box for a word */
302
+ lua_pushnumber (L , counter_cw + 1 );
303
+ lua_newtable (L );
304
+ counter_w ++ ;
305
+ counter_cw ++ ;
306
+
307
+ /* update line box */
308
+ l_x0 = box -> x < l_x0 ? box -> x : l_x0 ;
309
+ l_y0 = box -> y < l_y0 ? box -> y : l_y0 ;
310
+ l_x1 = box -> x + box -> w > l_x1 ? box -> x + box -> w : l_x1 ;
311
+ l_y1 = box -> y + box -> h > l_y1 ? box -> y + box -> h : l_y1 ;
312
+
313
+ /* set word box */
314
+ lua_pushstring (L , "x0" );
315
+ lua_pushnumber (L , box -> x );
316
+ lua_settable (L , -3 );
317
+
318
+ lua_pushstring (L , "y0" );
319
+ lua_pushnumber (L , box -> y );
320
+ lua_settable (L , -3 );
321
+
322
+ lua_pushstring (L , "x1" );
323
+ lua_pushnumber (L , box -> x + box -> w );
324
+ lua_settable (L , -3 );
325
+
326
+ lua_pushstring (L , "y1" );
327
+ lua_pushnumber (L , box -> y + box -> h );
328
+ lua_settable (L , -3 );
329
+
330
+ //printf("box %d:%d,%d,%d,%d\n",counter_w,box->x,box->y,box->w,box->h);
331
+ /* set word entry to line subtable */
332
+ lua_settable (L , -3 );
333
+ if (counter_w < nr_word )
334
+ numaGetIValue (kc -> nai , counter_w , & counter_l );
335
+ } /* end of while */
336
+ if (current_line != counter_l ) counter_w -- ;
337
+ /* box for a whole line */
338
+ lua_pushstring (L , "x0" );
339
+ lua_pushnumber (L , l_x0 );
340
+ lua_settable (L , -3 );
341
+ lua_pushstring (L , "y0" );
342
+ lua_pushnumber (L , l_y0 );
343
+ lua_settable (L , -3 );
344
+ lua_pushstring (L , "x1" );
345
+ lua_pushnumber (L , l_x1 );
346
+ lua_settable (L , -3 );
347
+ lua_pushstring (L , "y1" );
348
+ lua_pushnumber (L , l_y1 );
349
+ lua_settable (L , -3 );
350
+ /* set line entry to box table */
351
+ lua_settable (L , -3 );
352
+ } /* end of for */
353
+
354
+ return 1 ;
355
+ }
356
+
357
+ static int kcGetOCRWord (lua_State * L ) {
358
+ KOPTContext * kc = (KOPTContext * ) luaL_checkudata (L , 1 , "koptcontext" );
359
+ const char * datadir = luaL_checkstring (L , 2 );
360
+ const char * lang = luaL_checkstring (L , 3 );
361
+ int x = luaL_checkint (L , 4 );
362
+ int y = luaL_checkint (L , 5 );
363
+ int w = luaL_checkint (L , 6 );
364
+ int h = luaL_checkint (L , 7 );
365
+ char word [256 ];
366
+
367
+ ocrtess_init (datadir , lang , 3 , NULL );
368
+ ocrtess_single_word_from_bmp8 (
369
+ word , 255 , & kc -> dst ,
370
+ x , y , x + w , y + h , 3 , 0 , 1 , NULL );
371
+ ocrtess_end ();
372
+
373
+ lua_pushstring (L , word );
374
+ return 1 ;
375
+ }
376
+
252
377
static const struct luaL_Reg koptcontext_meth [] = {
253
378
{"setBBox" , kcSetBBox },
254
379
{"setTrim" , kcSetTrim },
@@ -276,6 +401,10 @@ static const struct luaL_Reg koptcontext_meth[] = {
276
401
277
402
{"setPreCache" , kcSetPreCache },
278
403
{"isPreCache" , kcIsPreCache },
404
+ {"setDebug" , kcSetDebug },
405
+
406
+ {"getWordBoxes" , kcGetWordBoxes },
407
+ {"getOCRWord" , kcGetOCRWord },
279
408
280
409
{"free" , freeContext },
281
410
{"__gc" , freeContext },
0 commit comments