Skip to content
This repository was archived by the owner on Feb 15, 2023. It is now read-only.

Commit b8e2f45

Browse files
author
Vicent Marti
committed
parser: Simplify the element_in_specific_scope calls
The old implementation using 2 tagsets was being rather wasteful with stack space, allocating 2 whole sets when one of them always contains a single tag element. Knowing that the `expected` elements must always be in the HTML namespace, we can simplify these APIs by passing an array of elements and stop allocating so much space on the stack.
1 parent e826905 commit b8e2f45

File tree

1 file changed

+31
-40
lines changed

1 file changed

+31
-40
lines changed

src/parser.c

Lines changed: 31 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,40 +1313,38 @@ static GumboQuirksModeEnum compute_quirks_mode(
13131313
// names. For example, "has an element in list scope" looks for an element of
13141314
// the given qualified name within the nearest enclosing <ol> or <ul>, along
13151315
// with a bunch of generic element types that serve to "firewall" their content
1316-
// from the rest of the document.
1317-
static bool has_an_element_in_specific_scope(GumboParser* parser, gumbo_tagset expected, bool negate, const gumbo_tagset tags) {
1316+
// from the rest of the document. Note that because of the way the spec is written,
1317+
// all elements are expected to be in the HTML namespace
1318+
static bool has_an_element_in_specific_scope(GumboParser* parser,
1319+
int expected_size, const GumboTag *expected, bool negate, const gumbo_tagset tags) {
13181320
GumboVector* open_elements = &parser->_parser_state->_open_elements;
13191321
for (int i = open_elements->length; --i >= 0; ) {
13201322
const GumboNode* node = open_elements->data[i];
1321-
if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE) {
1323+
if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE)
13221324
continue;
1325+
1326+
GumboTag node_tag = node->v.element.tag;
1327+
GumboNamespaceEnum node_ns = node->v.element.tag_namespace;
1328+
for (int j = 0; j < expected_size; ++j) {
1329+
if (node_tag == expected[j] && node_ns == GUMBO_NAMESPACE_HTML)
1330+
return true;
13231331
}
1324-
if (TAGSET_INCLUDES(expected, node->v.element.tag_namespace, node->v.element.tag)) {
1325-
return true;
1326-
}
1327-
bool found_qualname = false;
1328-
if (TAGSET_INCLUDES(tags, node->v.element.tag_namespace, node->v.element.tag)) {
1329-
found_qualname = true;
1330-
}
1331-
if (negate != found_qualname) {
1332+
1333+
bool found = TAGSET_INCLUDES(tags, node_ns, node_tag);
1334+
if (negate != found)
13321335
return false;
1333-
}
13341336
}
13351337
return false;
13361338
}
13371339

13381340
// Checks for the presence of an open element of the specified tag type.
13391341
static bool has_open_element(GumboParser* parser, GumboTag tag) {
1340-
gumbo_tagset qualset = {0};
1341-
qualset[(int) tag] = (1 << (int) GUMBO_NAMESPACE_HTML);
1342-
return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(HTML) } );
1342+
return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(HTML) } );
13431343
}
13441344

13451345
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-scope
13461346
static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
1347-
gumbo_tagset qualset = {0};
1348-
qualset[(int) tag] = (1 << (int) GUMBO_NAMESPACE_HTML);
1349-
return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1347+
return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(APPLET),
13501348
TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
13511349
TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
13521350
TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
@@ -1383,19 +1381,17 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
13831381

13841382
// Like has_an_element_in_scope, but restricts the expected qualified name to a
13851383
// range of possible qualified names instead of just a single one.
1386-
static bool has_an_element_in_scope_with_tagname(GumboParser* parser, gumbo_tagset qualset) {
1387-
return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1388-
TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1389-
TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1390-
TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1391-
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
1384+
static bool has_an_element_in_scope_with_tagname(GumboParser* parser, int expected_len, const GumboTag expected[]) {
1385+
return has_an_element_in_specific_scope(parser, expected_len, expected, false, (gumbo_tagset) {
1386+
TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1387+
TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1388+
TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1389+
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
13921390
}
13931391

13941392
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-list-item-scope
13951393
static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
1396-
gumbo_tagset qualset = {0};
1397-
qualset[(int)tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1398-
return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1394+
return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(APPLET),
13991395
TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
14001396
TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
14011397
TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
@@ -1405,9 +1401,7 @@ static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
14051401

14061402
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-button-scope
14071403
static bool has_an_element_in_button_scope(GumboParser* parser, GumboTag tag) {
1408-
gumbo_tagset qualset = {0};
1409-
qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1410-
return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1404+
return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(APPLET),
14111405
TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
14121406
TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
14131407
TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
@@ -1416,17 +1410,13 @@ static bool has_an_element_in_button_scope(GumboParser* parser, GumboTag tag) {
14161410

14171411
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-table-scope
14181412
static bool has_an_element_in_table_scope(GumboParser* parser, GumboTag tag) {
1419-
gumbo_tagset qualset = {0};
1420-
qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1421-
return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(HTML),
1413+
return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(HTML),
14221414
TAG(TABLE), TAG(TEMPLATE) });
14231415
}
14241416

14251417
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-select-scope
14261418
static bool has_an_element_in_select_scope(GumboParser* parser, GumboTag tag) {
1427-
gumbo_tagset qualset = {0};
1428-
qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1429-
return has_an_element_in_specific_scope(parser, qualset, true, (gumbo_tagset) { TAG(OPTGROUP), TAG(OPTION) });
1419+
return has_an_element_in_specific_scope(parser, 1, &tag, true, (gumbo_tagset) { TAG(OPTGROUP), TAG(OPTION) });
14301420
}
14311421

14321422
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#generate-implied-end-tags
@@ -2566,10 +2556,11 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
25662556
return false;
25672557
}
25682558
return implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token_tag);
2569-
} else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(H1), TAG(H2), TAG(H3),
2570-
TAG(H4), TAG(H5), TAG(H6) })) {
2571-
if (!has_an_element_in_scope_with_tagname(parser, (gumbo_tagset) { TAG(H1), TAG(H2), TAG(H3), TAG(H4),
2572-
TAG(H5), TAG(H6) })) {
2559+
} else if (tag_in(token, kEndTag, (gumbo_tagset) {
2560+
TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6) })) {
2561+
if (!has_an_element_in_scope_with_tagname(parser, 6, (GumboTag[]) {
2562+
GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
2563+
GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6})) {
25732564
// No heading open; ignore the token entirely.
25742565
parser_add_parse_error(parser, token);
25752566
ignore_token(parser);

0 commit comments

Comments
 (0)