Skip to content

Commit 400ea14

Browse files
committed
Mozilla bug 1499682 - SIMD-accelerate the HTML tokenizer in the innerHTML case.
Differential Revision: https://phabricator.services.mozilla.com/D227317
1 parent c25f974 commit 400ea14

File tree

3 files changed

+153
-48
lines changed

3 files changed

+153
-48
lines changed

src/nu/validator/htmlparser/impl/Tokenizer.java

+121-47
Original file line numberDiff line numberDiff line change
@@ -1493,12 +1493,22 @@ public boolean tokenizeBuffer(UTF16Buffer buffer) throws SAXException {
14931493
*/
14941494
// CPPONLY: if (mViewSource) {
14951495
// CPPONLY: mViewSource.SetBuffer(buffer);
1496-
// CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1496+
// CPPONLY: if (htmlaccel_enabled()) {
1497+
// CPPONLY: pos = StateLoopViewSourceSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1498+
// CPPONLY: } else {
1499+
// CPPONLY: pos = StateLoopViewSourceALU(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1500+
// CPPONLY: }
14971501
// CPPONLY: mViewSource.DropBuffer((pos == buffer.getEnd()) ? pos : pos + 1);
14981502
// CPPONLY: } else if (tokenHandler.WantsLineAndColumn()) {
1499-
// CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1503+
// CPPONLY: if (htmlaccel_enabled()) {
1504+
// CPPONLY: pos = StateLoopLineColSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1505+
// CPPONLY: } else {
1506+
// CPPONLY: pos = StateLoopLineColALU(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1507+
// CPPONLY: }
1508+
// CPPONLY: } else if (htmlaccel_enabled() && ((buffer.getEnd() - pos) >= 32)) {
1509+
// CPPONLY: pos = StateLoopFastestSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
15001510
// CPPONLY: } else {
1501-
// CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1511+
// CPPONLY: pos = StateLoopFastestALU(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
15021512
// CPPONLY: }
15031513
// [NOCPP[
15041514
pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState,
@@ -1623,54 +1633,118 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
16231633
switch (state) {
16241634
case DATA:
16251635
dataloop: for (;;) {
1636+
// Ideally this reconsume block would be a separate state, DATA_RECONSUME above this one
1637+
// with fallthrough into this state. However, such a change would be disruptive to
1638+
// TransitionHandler and everything that works with returnState.
16261639
if (reconsume) {
16271640
reconsume = false;
1628-
} else {
1629-
if (++pos == endPos) {
1630-
break stateloop;
1641+
// This is a manual copy of the switch below with break/continue
1642+
// adjusted as relevant. Make sure to keep in sync with the switch below!
1643+
switch (c) {
1644+
case '&':
1645+
/*
1646+
* U+0026 AMPERSAND (&) Switch to the character
1647+
* reference in data state.
1648+
*/
1649+
flushChars(buf, pos);
1650+
assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
1651+
appendCharRefBuf(c);
1652+
setAdditionalAndRememberAmpersandLocation('\u0000');
1653+
returnState = state;
1654+
state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
1655+
continue stateloop;
1656+
case '<':
1657+
/*
1658+
* U+003C LESS-THAN SIGN (<) Switch to the tag
1659+
* open state.
1660+
*/
1661+
flushChars(buf, pos);
1662+
1663+
state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
1664+
// `break` optimizes; `continue stateloop;` would be valid
1665+
break dataloop;
1666+
case '\u0000':
1667+
maybeEmitReplacementCharacter(buf, pos);
1668+
break;
1669+
case '\r':
1670+
emitCarriageReturn(buf, pos);
1671+
break stateloop;
1672+
case '\n':
1673+
silentLineFeed();
1674+
// CPPONLY: MOZ_FALLTHROUGH;
1675+
default:
1676+
/*
1677+
* Anything else Emit the input character as a
1678+
* character token.
1679+
*
1680+
* Stay in the data state.
1681+
*/
1682+
break;
16311683
}
1632-
c = checkChar(buf, pos);
16331684
}
1634-
switch (c) {
1635-
case '&':
1636-
/*
1637-
* U+0026 AMPERSAND (&) Switch to the character
1638-
* reference in data state.
1639-
*/
1640-
flushChars(buf, pos);
1641-
assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
1642-
appendCharRefBuf(c);
1643-
setAdditionalAndRememberAmpersandLocation('\u0000');
1644-
returnState = state;
1645-
state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
1646-
continue stateloop;
1647-
case '<':
1648-
/*
1649-
* U+003C LESS-THAN SIGN (<) Switch to the tag
1650-
* open state.
1651-
*/
1652-
flushChars(buf, pos);
1653-
1654-
state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
1655-
// `break` optimizes; `continue stateloop;` would be valid
1656-
break dataloop;
1657-
case '\u0000':
1658-
maybeEmitReplacementCharacter(buf, pos);
1659-
continue;
1660-
case '\r':
1661-
emitCarriageReturn(buf, pos);
1662-
break stateloop;
1663-
case '\n':
1664-
silentLineFeed();
1665-
// CPPONLY: MOZ_FALLTHROUGH;
1666-
default:
1667-
/*
1668-
* Anything else Emit the input character as a
1669-
* character token.
1670-
*
1671-
* Stay in the data state.
1672-
*/
1673-
continue;
1685+
datamiddle: for (;;) {
1686+
++pos;
1687+
// Perhaps at some point, it will be appropriate to do SIMD in Java, but not today.
1688+
// The line below advances pos by some number of code units that this state is indifferent to.
1689+
// CPPONLY: pos += accelerateData(buf, pos, endPos);
1690+
for (;;) {
1691+
if (pos == endPos) {
1692+
break stateloop;
1693+
}
1694+
c = checkChar(buf, pos);
1695+
// Make sure to keep in sync with the switch above in the reconsume block!
1696+
switch (c) {
1697+
case '&':
1698+
/*
1699+
* U+0026 AMPERSAND (&) Switch to the character
1700+
* reference in data state.
1701+
*/
1702+
flushChars(buf, pos);
1703+
assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
1704+
appendCharRefBuf(c);
1705+
setAdditionalAndRememberAmpersandLocation('\u0000');
1706+
returnState = state;
1707+
state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
1708+
continue stateloop;
1709+
case '<':
1710+
/*
1711+
* U+003C LESS-THAN SIGN (<) Switch to the tag
1712+
* open state.
1713+
*/
1714+
flushChars(buf, pos);
1715+
1716+
state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
1717+
// `break` optimizes; `continue stateloop;` would be valid
1718+
break dataloop;
1719+
case '\u0000':
1720+
maybeEmitReplacementCharacter(buf, pos);
1721+
// Climb back to the SIMD path.
1722+
continue datamiddle;
1723+
case '\r':
1724+
emitCarriageReturn(buf, pos);
1725+
break stateloop;
1726+
case '\n':
1727+
silentLineFeed();
1728+
// Climb back to the SIMD path.
1729+
continue datamiddle;
1730+
default:
1731+
/*
1732+
* Anything else Emit the input character as a
1733+
* character token.
1734+
*
1735+
* Stay in the data state.
1736+
*/
1737+
// Don't go back to SIMD. We have less than a SIMD
1738+
// stride to go if we come here in the SIMD case with
1739+
// the fastest loop policy. With other policies, we
1740+
// can come here due to a non-BMP character, in which
1741+
// case we stay on the ALU path until the end of the
1742+
// line.
1743+
// We need to increment pos!
1744+
++pos;
1745+
continue;
1746+
}
1747+
}
16741748
}
16751749
}
16761750
// CPPONLY: MOZ_FALLTHROUGH;

translator-src/nu/validator/htmlparser/cpptranslate/CppTypes.java

+9-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ public class CppTypes {
9696
"nsHtml5ArrayCopy", "nsHtml5AtomTable", "nsHtml5DocumentMode",
9797
"nsHtml5Highlighter", "nsHtml5Macros", "nsHtml5NamedCharacters",
9898
"nsHtml5NamedCharactersAccel", "nsHtml5String",
99-
"nsIContent", "nsTraceRefcnt" };
99+
"nsIContent", "nsTraceRefcnt", "mozilla/parser/htmlaccel_enabled" };
100100

101101
private static final String[] STACK_NODE_INCLUDES = { "nsAtom", "nsHtml5AtomTable",
102102
"nsHtml5HtmlAttributes", "nsHtml5String", "nsNameSpaceManager", "nsIContent",
@@ -486,6 +486,10 @@ public String checkChar() {
486486
return "P::checkChar";
487487
}
488488

489+
public String accelerateData() {
490+
return "P::accelerateData";
491+
}
492+
489493
public String silentLineFeed() {
490494
return "P::silentLineFeed";
491495
}
@@ -541,4 +545,8 @@ public String crashMacro() {
541545
public String loopPolicyInclude() {
542546
return "nsHtml5TokenizerLoopPolicies";
543547
}
548+
549+
public String alwaysInline() {
550+
return "MOZ_ALWAYS_INLINE_EVEN_DEBUG";
551+
}
544552
}

translator-src/nu/validator/htmlparser/cpptranslate/CppVisitor.java

+23
Original file line numberDiff line numberDiff line change
@@ -1320,6 +1320,9 @@ public void visit(MethodCallExpr n, LocalSymbolTable arg) {
13201320
} else if ("checkChar".equals(n.getName())
13211321
&& n.getScope() == null) {
13221322
visitCheckChar(n, arg);
1323+
} else if ("accelerateData".equals(n.getName())
1324+
&& n.getScope() == null) {
1325+
visitAccelerateData(n, arg);
13231326
} else if ("silentCarriageReturn".equals(n.getName())
13241327
&& n.getScope() == null) {
13251328
visitSilentCarriageReturn(n, arg);
@@ -1650,6 +1653,10 @@ protected void printMethodDeclaration(MethodDeclaration n,
16501653
&& "Tokenizer".equals(javaClassName)
16511654
&& cppTypes.stateLoopPolicies().length > 0) {
16521655
printer.print("template<class P>");
1656+
if ("stateLoop".equals(n.getName())) {
1657+
printer.print(" ");
1658+
printer.print(cppTypes.alwaysInline());
1659+
}
16531660
if (inHeader()) {
16541661
printer.print(" ");
16551662
} else {
@@ -1956,6 +1963,22 @@ private void visitCheckChar(MethodCallExpr call, LocalSymbolTable arg) {
19561963
printer.print(")");
19571964
}
19581965

1966+
private void visitAccelerateData(MethodCallExpr call, LocalSymbolTable arg) {
1967+
List<Expression> args = call.getArgs();
1968+
printer.print(cppTypes.accelerateData());
1969+
printer.print("(this, ");
1970+
if (call.getArgs() != null) {
1971+
for (Iterator<Expression> i = call.getArgs().iterator(); i.hasNext();) {
1972+
Expression e = i.next();
1973+
e.accept(this, arg);
1974+
if (i.hasNext()) {
1975+
printer.print(", ");
1976+
}
1977+
}
1978+
}
1979+
printer.print(")");
1980+
}
1981+
19591982
private void visitSilentLineFeed(MethodCallExpr call, LocalSymbolTable arg) {
19601983
printer.print(cppTypes.silentLineFeed());
19611984
printer.print("(this)");

0 commit comments

Comments
 (0)