@@ -1493,12 +1493,22 @@ public boolean tokenizeBuffer(UTF16Buffer buffer) throws SAXException {
1493
1493
*/
1494
1494
// CPPONLY: if (mViewSource) {
1495
1495
// CPPONLY: mViewSource.SetBuffer(buffer);
1496
- // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1496
+ // CPPONLY: if (htmlaccel_enabled()) {
1497
+ // CPPONLY: pos = StateLoopViewSourceSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1498
+ // CPPONLY: } else {
1499
+ // CPPONLY: pos = StateLoopViewSourceALU(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1500
+ // CPPONLY: }
1497
1501
// CPPONLY: mViewSource.DropBuffer((pos == buffer.getEnd()) ? pos : pos + 1);
1498
1502
// CPPONLY: } else if (tokenHandler.WantsLineAndColumn()) {
1499
- // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1503
+ // CPPONLY: if (htmlaccel_enabled()) {
1504
+ // CPPONLY: pos = StateLoopLineColSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1505
+ // CPPONLY: } else {
1506
+ // CPPONLY: pos = StateLoopLineColALU(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1507
+ // CPPONLY: }
1508
+ // CPPONLY: } else if (htmlaccel_enabled() && ((buffer.getEnd() - pos) >= 32)) {
1509
+ // CPPONLY: pos = StateLoopFastestSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1500
1510
// CPPONLY: } else {
1501
- // CPPONLY: pos = stateLoop (state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1511
+ // CPPONLY: pos = StateLoopFastestALU (state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1502
1512
// CPPONLY: }
1503
1513
// [NOCPP[
1504
1514
pos = stateLoop (state , c , pos , buffer .getBuffer (), false , returnState ,
@@ -1623,54 +1633,118 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
1623
1633
switch (state ) {
1624
1634
case DATA :
1625
1635
dataloop : for (;;) {
1636
+ // Ideally this reconsume block would be a separate state, DATA_RECONSUME above this one
1637
+ // with fallthrough into this state. However, such a change would be disruptive to
1638
+ // TransitionHandler and everything that works with returnState.
1626
1639
if (reconsume ) {
1627
1640
reconsume = false ;
1628
- } else {
1629
- if (++pos == endPos ) {
1630
- break stateloop ;
1641
+ // This is a manual copy of the switch below with break/continue
1642
+ // adjusted as relevant. Make sure to keep in sync with the switch below!
1643
+ switch (c ) {
1644
+ case '&' :
1645
+ /*
1646
+ * U+0026 AMPERSAND (&) Switch to the character
1647
+ * reference in data state.
1648
+ */
1649
+ flushChars (buf , pos );
1650
+ assert charRefBufLen == 0 : "charRefBufLen not reset after previous use!" ;
1651
+ appendCharRefBuf (c );
1652
+ setAdditionalAndRememberAmpersandLocation ('\u0000' );
1653
+ returnState = state ;
1654
+ state = transition (state , Tokenizer .CONSUME_CHARACTER_REFERENCE , reconsume , pos );
1655
+ continue stateloop ;
1656
+ case '<' :
1657
+ /*
1658
+ * U+003C LESS-THAN SIGN (<) Switch to the tag
1659
+ * open state.
1660
+ */
1661
+ flushChars (buf , pos );
1662
+
1663
+ state = transition (state , Tokenizer .TAG_OPEN , reconsume , pos );
1664
+ // `break` optimizes; `continue stateloop;` would be valid
1665
+ break dataloop ;
1666
+ case '\u0000' :
1667
+ maybeEmitReplacementCharacter (buf , pos );
1668
+ break ;
1669
+ case '\r' :
1670
+ emitCarriageReturn (buf , pos );
1671
+ break stateloop ;
1672
+ case '\n' :
1673
+ silentLineFeed ();
1674
+ // CPPONLY: MOZ_FALLTHROUGH;
1675
+ default :
1676
+ /*
1677
+ * Anything else Emit the input character as a
1678
+ * character token.
1679
+ *
1680
+ * Stay in the data state.
1681
+ */
1682
+ break ;
1631
1683
}
1632
- c = checkChar (buf , pos );
1633
1684
}
1634
- switch (c ) {
1635
- case '&' :
1636
- /*
1637
- * U+0026 AMPERSAND (&) Switch to the character
1638
- * reference in data state.
1639
- */
1640
- flushChars (buf , pos );
1641
- assert charRefBufLen == 0 : "charRefBufLen not reset after previous use!" ;
1642
- appendCharRefBuf (c );
1643
- setAdditionalAndRememberAmpersandLocation ('\u0000' );
1644
- returnState = state ;
1645
- state = transition (state , Tokenizer .CONSUME_CHARACTER_REFERENCE , reconsume , pos );
1646
- continue stateloop ;
1647
- case '<' :
1648
- /*
1649
- * U+003C LESS-THAN SIGN (<) Switch to the tag
1650
- * open state.
1651
- */
1652
- flushChars (buf , pos );
1653
-
1654
- state = transition (state , Tokenizer .TAG_OPEN , reconsume , pos );
1655
- // `break` optimizes; `continue stateloop;` would be valid
1656
- break dataloop ;
1657
- case '\u0000' :
1658
- maybeEmitReplacementCharacter (buf , pos );
1659
- continue ;
1660
- case '\r' :
1661
- emitCarriageReturn (buf , pos );
1662
- break stateloop ;
1663
- case '\n' :
1664
- silentLineFeed ();
1665
- // CPPONLY: MOZ_FALLTHROUGH;
1666
- default :
1667
- /*
1668
- * Anything else Emit the input character as a
1669
- * character token.
1670
- *
1671
- * Stay in the data state.
1672
- */
1673
- continue ;
1685
+ datamiddle : for (;;) {
1686
+ ++pos ;
1687
+ // Perhaps at some point, it will be appropriate to do SIMD in Java, but not today.
1688
+ // The line below advances pos by some number of code units that this state is indifferent to.
1689
+ // CPPONLY: pos += accelerateData(buf, pos, endPos);
1690
+ for (;;) {
1691
+ if (pos == endPos ) {
1692
+ break stateloop ;
1693
+ }
1694
+ c = checkChar (buf , pos );
1695
+ // Make sure to keep in sync with the switch above in the reconsume block!
1696
+ switch (c ) {
1697
+ case '&' :
1698
+ /*
1699
+ * U+0026 AMPERSAND (&) Switch to the character
1700
+ * reference in data state.
1701
+ */
1702
+ flushChars (buf , pos );
1703
+ assert charRefBufLen == 0 : "charRefBufLen not reset after previous use!" ;
1704
+ appendCharRefBuf (c );
1705
+ setAdditionalAndRememberAmpersandLocation ('\u0000' );
1706
+ returnState = state ;
1707
+ state = transition (state , Tokenizer .CONSUME_CHARACTER_REFERENCE , reconsume , pos );
1708
+ continue stateloop ;
1709
+ case '<' :
1710
+ /*
1711
+ * U+003C LESS-THAN SIGN (<) Switch to the tag
1712
+ * open state.
1713
+ */
1714
+ flushChars (buf , pos );
1715
+
1716
+ state = transition (state , Tokenizer .TAG_OPEN , reconsume , pos );
1717
+ // `break` optimizes; `continue stateloop;` would be valid
1718
+ break dataloop ;
1719
+ case '\u0000' :
1720
+ maybeEmitReplacementCharacter (buf , pos );
1721
+ // Climb back to the SIMD path.
1722
+ continue datamiddle ;
1723
+ case '\r' :
1724
+ emitCarriageReturn (buf , pos );
1725
+ break stateloop ;
1726
+ case '\n' :
1727
+ silentLineFeed ();
1728
+ // Climb back to the SIMD path.
1729
+ continue datamiddle ;
1730
+ default :
1731
+ /*
1732
+ * Anything else Emit the input character as a
1733
+ * character token.
1734
+ *
1735
+ * Stay in the data state.
1736
+ */
1737
+ // Don't go back to SIMD. We have less than a SIMD
1738
+ // stride to go if we come here in the SIMD case with
1739
+ // the fastest loop policy. With other policies, we
1740
+ // can come here due to a non-BMP character, in which
1741
+ // case we stay on the ALU path until the end of the
1742
+ // line.
1743
+ // We need to increment pos!
1744
+ ++pos ;
1745
+ continue ;
1746
+ }
1747
+ }
1674
1748
}
1675
1749
}
1676
1750
// CPPONLY: MOZ_FALLTHROUGH;
0 commit comments