Skip to content

Commit fb12297

Browse files
rickrick
authored andcommitted
add gumbo parser and test case
1 parent dd47bd5 commit fb12297

File tree

19 files changed

+231
-3
lines changed

19 files changed

+231
-3
lines changed

.gitmodules

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
url = https://github.com/sagiegurari/c_stringfn
5858
[submodule "submodules/c_timer"]
5959
path = submodules/c_timer
60-
url = https://github.com/binRick/c_timer
60+
url = ssh://git@github.com/binRick/c_timer
6161
[submodule "submodules/cargs"]
6262
path = submodules/cargs
6363
url = https://github.com/likle/cargs
@@ -325,3 +325,9 @@
325325
[submodule "submodules/submodules/generic-print"]
326326
path = submodules/submodules/generic-print
327327
url = https://github.com/exebook/generic-print.git
328+
[submodule "submodules/gumbo-parser"]
329+
path = submodules/gumbo-parser
330+
url = https://github.com/google/gumbo-parser
331+
[submodule "submodules/wrapdb"]
332+
path = submodules/wrapdb
333+
url = https://github.com/mesonbuild/wrapdb

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ TESTS_LIST_LOG_FILE = $(shell pwd)/.tests-list.log
1616
TESTS_SUITES_LOG_FILE=$(shell pwd)/.tests-suites.log
1717
TESTS_TESTS_LOG_FILE=$(shell pwd)/.tests-tests.log
1818
##############################################################
19-
TIDIED_FILES = deps*/*.c deps*/*.h term*/*.c term*/*.h ctable*/*.c ctable*/*.h *table*/*.c *table*/*.h
19+
TIDIED_FILES = deps*/*.c deps*/*.h term*/*.c term*/*.h ctable*/*.c ctable*/*.h *table*/*.c *table*/*.h gumbo-test/*.c gumbo-test/*.h
2020
TRIGGER_FILE=.trigger.c
2121
##############################################################
2222
do-setup:

gumbo-test/gumbo-test.c

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#include "gumbo-test.h"
2+
#include <assert.h>
3+
#include <stdio.h>
4+
#include <stdlib.h>
5+
#include <sys/stat.h>
6+
7+
8+
static void read_file(FILE *fp, char **output, int *length) {
9+
struct stat filestats;
10+
int fd = fileno(fp);
11+
12+
fstat(fd, &filestats);
13+
*length = filestats.st_size;
14+
*output = malloc(*length + 1);
15+
int start = 0;
16+
int bytes_read;
17+
18+
while ((bytes_read = fread(*output + start, 1, *length - start, fp))) {
19+
start += bytes_read;
20+
}
21+
}
22+
23+
24+
static const char * find_title(const GumboNode *root) {
25+
assert(root->type == GUMBO_NODE_ELEMENT);
26+
assert(root->v.element.children.length >= 2);
27+
28+
const GumboVector *root_children = &root->v.element.children;
29+
GumboNode *head = NULL;
30+
for (int i = 0; i < root_children->length; ++i) {
31+
GumboNode *child = root_children->data[i];
32+
if (child->type == GUMBO_NODE_ELEMENT
33+
&& child->v.element.tag == GUMBO_TAG_HEAD) {
34+
head = child;
35+
break;
36+
}
37+
}
38+
assert(head != NULL);
39+
40+
GumboVector *head_children = &head->v.element.children;
41+
for (int i = 0; i < head_children->length; ++i) {
42+
GumboNode *child = head_children->data[i];
43+
if (child->type == GUMBO_NODE_ELEMENT
44+
&& child->v.element.tag == GUMBO_TAG_TITLE) {
45+
if (child->v.element.children.length != 1) {
46+
return("<empty title>");
47+
}
48+
GumboNode *title_text = child->v.element.children.data[0];
49+
assert(title_text->type == GUMBO_NODE_TEXT || title_text->type == GUMBO_NODE_WHITESPACE);
50+
return(title_text->v.text.text);
51+
}
52+
}
53+
return("<no title found>");
54+
}
55+
56+
57+
int gmain(int argc, const char **argv) {
58+
if (argc != 2) {
59+
printf("Usage: get_title <html filename>.\n");
60+
exit(EXIT_FAILURE);
61+
}
62+
const char *filename = argv[1];
63+
64+
FILE *fp = fopen(filename, "r");
65+
if (!fp) {
66+
printf("File %s not found!\n", filename);
67+
exit(EXIT_FAILURE);
68+
}
69+
70+
char *input;
71+
int input_length;
72+
read_file(fp, &input, &input_length);
73+
GumboOutput *output = gumbo_parse_with_options(
74+
&kGumboDefaultOptions, input, input_length);
75+
const char *title = find_title(output->root);
76+
printf("%s\n", title);
77+
gumbo_destroy_output(&kGumboDefaultOptions, output);
78+
free(input);
79+
}
80+
81+
82+
int main(int argc, char **argv) {
83+
(void)argc; (void)argv;
84+
if ((argc >= 2) && (strcmp(argv[1], "--test") == 0)) {
85+
printf("Test OK\n"); return(0);
86+
}
87+
GumboOutput *output = gumbo_parse("<h1>Hello, World!</h1>");
88+
gumbo_destroy_output(&kGumboDefaultOptions, output);
89+
return(gmain(argc, argv));
90+
91+
92+
return(0);
93+
}

gumbo-test/gumbo-test.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#pragma once
2+
#include "cargs/include/cargs.h"
3+
#include "submodules/gumbo-parser/src/gumbo.h"
4+
#include <ctype.h>
5+
#include <stdarg.h>
6+
#include <stdbool.h>
7+
#include <stdbool.h>
8+
#include <stdint.h>
9+
#include <stdint.h>
10+
#include <stdio.h>
11+
#include <stdio.h>
12+
#include <stdlib.h>
13+
#include <stdlib.h>
14+
#include <string.h>
15+
#include <string.h>
16+
#include <sys/time.h>
17+

gumbo-test/meson.build

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
gumbo_test_srcs = [
2+
'gumbo-test.c',
3+
]
4+
gumbo_test_inc = [
5+
inc,
6+
]
7+
gumbo_test_deps = [
8+
gumbo_dep,
9+
]
10+
11+
if get_option('enable-binaries')
12+
gumbo_test_exec = executable('gumbo-test',
13+
gumbo_test_srcs,
14+
dependencies: gumbo_test_deps,
15+
include_directories: gumbo_test_inc,
16+
)
17+
if get_option('enable-binary-tests')
18+
test('gumbo-test', gumbo_test_exec, args: ['--test'])
19+
endif
20+
endif

meson.build

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ inc = [
6161
include_directories('.'),
6262
]
6363
###########################################
64+
subdir('meson/deps/gumbo')
6465
subdir('meson')
6566
###########################################
6667
subdir('deps')
@@ -72,3 +73,5 @@ subdir('term-tests-test')
7273
subdir('ctable')
7374
subdir('tests-table')
7475
###########################################
76+
subdir('gumbo-test')
77+
###########################################
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
embedded_palettes_lib = library('embedded_palettes',
2-
'../../../embeds/tbl1.c',
2+
meson_deps_dir+'/submodules/c_palettes/embeds/tbl1.c',
33
install: false,
44
dependencies: [],
55
include_directories: [
6+
inc
67
]
78
)
89
embedded_palettes_dep = declare_dependency(include_directories:
910
[
11+
inc
1012
],
1113
link_with: embedded_palettes_lib)

meson/deps/gumbo/meson.build

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#gumbo_dep = dependency('libgumbo',fallback:['libgumbo', 'gumbo_dep'], required: true)
2+
gumbo_dep = cc.find_library('gumbo', required: true)
3+
#libspng_lib = library('libspng',
4+
# meson_deps_dir + '/submodules/libspng/spng/spng.c',
5+
# install: false,
6+
# dependencies: [
7+
# ],
8+
# include_directories: [
9+
# inc,
10+
# ]
11+
#)
12+
#libspng_dep = declare_dependency(include_directories: [inc], link_with: libspng_lib)

meson/deps/libuv/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
libuv_dep = dependency('libuv',fallback:['libuv', 'libuv_dep'])

meson/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,5 @@ subdir('deps/generic_print')
4343
#subdir('deps/occurrences')
4444
subdir('deps/libspinner')
4545
subdir('deps/ctable')
46+
subdir('deps/libuv')
4647
####################################

0 commit comments

Comments
 (0)