Skip to content

Commit 61b15f1

Browse files
committed
WIP: HTML API: Stop at funky comments
1 parent d3286f8 commit 61b15f1

File tree

2 files changed

+255
-12
lines changed

2 files changed

+255
-12
lines changed

src/wp-includes/html-api/class-wp-html-tag-processor.php

Lines changed: 158 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,16 @@ class WP_HTML_Tag_Processor {
317317
*/
318318
private $stop_on_tag_closers;
319319

320+
/**
321+
* Whether to visit funky comments, e.g. </1>, when walking an input document.
322+
*
323+
* These are funny because they are errors.
324+
*
325+
* @since 6.3.0
326+
* @var bool
327+
*/
328+
private $stop_on_funky_comments;
329+
320330
/**
321331
* Holds updated HTML as updates are applied.
322332
*
@@ -538,6 +548,18 @@ class WP_HTML_Tag_Processor {
538548
*/
539549
protected $seek_count = 0;
540550

551+
/**
552+
* @since 6.3.0
553+
* @var string
554+
*/
555+
private $funky_comment_content = null;
556+
557+
/**
558+
* @since 6.3.0
559+
* @var int
560+
*/
561+
private $placeholders = 0;
562+
541563
/**
542564
* Constructor.
543565
*
@@ -1161,11 +1183,21 @@ private function parse_next_tag() {
11611183
* See https://github.com/WordPress/wordpress-develop/pull/4256
11621184
*/
11631185
if ( $this->is_closing_tag ) {
1164-
$closer_at = strpos( $html, '>', $at + 3 );
1186+
$closer_at = strpos( $html, '>', $at );
11651187
if ( false === $closer_at ) {
11661188
return false;
11671189
}
11681190

1191+
if ( $this->stop_on_funky_comments ) {
1192+
++$at;
1193+
$this->tag_name_length = 0;
1194+
$this->tag_name_starts_at = $at;
1195+
$this->bytes_already_parsed = $closer_at;
1196+
$this->funky_comment_content = array( $at, $closer_at );
1197+
1198+
return true;
1199+
}
1200+
11691201
$at = $closer_at + 1;
11701202
continue;
11711203
}
@@ -1301,11 +1333,12 @@ private function skip_whitespace() {
13011333
private function after_tag() {
13021334
$this->class_name_updates_to_attributes_updates();
13031335
$this->apply_attributes_updates();
1304-
$this->tag_name_starts_at = null;
1305-
$this->tag_name_length = null;
1306-
$this->tag_ends_at = null;
1307-
$this->is_closing_tag = null;
1308-
$this->attributes = array();
1336+
$this->tag_name_starts_at = null;
1337+
$this->tag_name_length = null;
1338+
$this->tag_ends_at = null;
1339+
$this->is_closing_tag = null;
1340+
$this->attributes = array();
1341+
$this->funky_comment_content = null;
13091342
}
13101343

13111344
/**
@@ -1552,7 +1585,7 @@ public function seek( $bookmark_name ) {
15521585
if ( ! array_key_exists( $bookmark_name, $this->bookmarks ) ) {
15531586
_doing_it_wrong(
15541587
__METHOD__,
1555-
__( 'Unknown bookmark name.' ),
1588+
__( 'Unknown bookmark name.' . ' ' . $bookmark_name ),
15561589
'6.2.0'
15571590
);
15581591
return false;
@@ -1577,6 +1610,14 @@ public function seek( $bookmark_name ) {
15771610
return $this->next_tag( array( 'tag_closers' => 'visit' ) );
15781611
}
15791612

1613+
public function rewind() {
1614+
// $this->get_updated_html();
1615+
$this->after_tag();
1616+
$this->bytes_already_parsed = 0;
1617+
$this->bytes_already_copied = 0;
1618+
$this->output_buffer = '';
1619+
}
1620+
15801621
/**
15811622
* Compare two WP_HTML_Text_Replacement objects.
15821623
*
@@ -1857,6 +1898,13 @@ public function is_tag_closer() {
18571898
return $this->is_closing_tag;
18581899
}
18591900

1901+
/**
1902+
* @since 6.3.0
1903+
*/
1904+
public function is_funky_comment() {
1905+
return null !== $this->funky_comment_content;
1906+
}
1907+
18601908
/**
18611909
* Updates or creates a new attribute on the currently matched tag with the passed value.
18621910
*
@@ -2113,6 +2161,13 @@ public function __toString() {
21132161
return $this->get_updated_html();
21142162
}
21152163

2164+
public function get_funky_content() {
2165+
if ( $this->funky_comment_content !== null ) {
2166+
list( $start, $end ) = $this->funky_comment_content;
2167+
return substr( $this->html, $start, $end - $start );
2168+
}
2169+
}
2170+
21162171
/**
21172172
* Returns the string representation of the HTML Tag Processor.
21182173
*
@@ -2204,11 +2259,12 @@ private function parse_query( $query ) {
22042259
return;
22052260
}
22062261

2207-
$this->last_query = $query;
2208-
$this->sought_tag_name = null;
2209-
$this->sought_class_name = null;
2210-
$this->sought_match_offset = 1;
2211-
$this->stop_on_tag_closers = false;
2262+
$this->last_query = $query;
2263+
$this->sought_tag_name = null;
2264+
$this->sought_class_name = null;
2265+
$this->sought_match_offset = 1;
2266+
$this->stop_on_tag_closers = false;
2267+
$this->stop_on_funky_comments = false;
22122268

22132269
// A single string value means "find the tag of this name".
22142270
if ( is_string( $query ) ) {
@@ -2246,8 +2302,94 @@ private function parse_query( $query ) {
22462302
if ( isset( $query['tag_closers'] ) ) {
22472303
$this->stop_on_tag_closers = 'visit' === $query['tag_closers'];
22482304
}
2305+
2306+
if ( isset( $query['funky_comments'] ) ) {
2307+
$this->stop_on_funky_comments = 'visit' === $query['funky_comments'];
2308+
}
22492309
}
22502310

2311+
public function declarative_match( $pattern_html ) {
2312+
$this->placeholders = 0;
2313+
while ( $this->placeholders > 0 ) {
2314+
$this->release_bookmark( "__placeholder_{$this->placeholders}" );
2315+
$this->placeholders--;
2316+
}
2317+
$pattern = new WP_HTML_Tag_Processor( $pattern_html );
2318+
$visit_everything = array( 'tag_closers' => 'visit', 'funky_comments' => 'visit' );
2319+
2320+
$same_thing = function ( WP_HTML_Tag_Processor $pattern, WP_HTML_Tag_Processor $test ) {
2321+
if ( $pattern->is_funky_comment() ) {
2322+
$this->placeholders++;
2323+
$this->set_bookmark( "__placeholder_{$this->placeholders}" );
2324+
return true;
2325+
}
2326+
2327+
if ( ! (
2328+
$pattern->get_tag() === $test->get_tag() &&
2329+
$pattern->is_tag_closer() === $test->is_tag_closer() &&
2330+
$pattern->is_funky_comment() === $test->is_funky_comment()
2331+
) ) {
2332+
return false;
2333+
}
2334+
2335+
$attribute_constraints = $pattern->get_attribute_names_with_prefix( '' );
2336+
if ( null === $attribute_constraints ) {
2337+
return true;
2338+
}
2339+
2340+
foreach ( $attribute_constraints as $name ) {
2341+
if ( $pattern->get_attribute( $name ) !== $test->get_attribute( $name ) ) {
2342+
return false;
2343+
}
2344+
}
2345+
2346+
return true;
2347+
};
2348+
2349+
step_one: // find the next spot the patterns start the same.
2350+
if ( ! $pattern->next_tag( $visit_everything ) ) {
2351+
return false;
2352+
}
2353+
2354+
while ( $this->placeholders > 0 ) {
2355+
$this->release_bookmark( "__placeholder_{$this->placeholders}" );
2356+
$this->placeholders--;
2357+
}
2358+
while ( $this->next_tag( $visit_everything ) ) {
2359+
if ( $same_thing( $pattern, $this ) ) {
2360+
goto step_two;
2361+
}
2362+
}
2363+
return false;
2364+
2365+
step_two: // see if the subsequence tokens in the pattern and test match.
2366+
$this->set_bookmark( 'match_start' );
2367+
if ( ! $pattern->next_tag( $visit_everything ) ) {
2368+
$this->release_bookmark( 'match_start' );
2369+
return true;
2370+
}
2371+
2372+
while ( true ) {
2373+
if ( ! $this->next_tag( $visit_everything ) ) {
2374+
$this->release_bookmark( 'match_start' );
2375+
return false;
2376+
}
2377+
2378+
if ( ! $same_thing( $pattern, $this ) ) {
2379+
$pattern->rewind();
2380+
goto step_one;
2381+
}
2382+
2383+
if ( ! $pattern->next_tag( $visit_everything ) ) {
2384+
break;
2385+
}
2386+
}
2387+
2388+
$this->set_bookmark( 'match_end' );
2389+
$this->seek( 'match_start' );
2390+
$this->release_bookmark( 'match_start' );
2391+
return true;
2392+
}
22512393

22522394
/**
22532395
* Checks whether a given tag and its attributes match the search criteria.
@@ -2257,6 +2399,10 @@ private function parse_query( $query ) {
22572399
* @return boolean Whether the given tag and its attribute match the search criteria.
22582400
*/
22592401
private function matches() {
2402+
if ( null !== $this->funky_comment_content && $this->stop_on_funky_comments ) {
2403+
return true;
2404+
}
2405+
22602406
if ( $this->is_closing_tag && ! $this->stop_on_tag_closers ) {
22612407
return false;
22622408
}

tests/phpunit/tests/html-api/wpHtmlTagProcessor.php

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ class WP_UnitTestCase extends PHPUnit\Framework\TestCase {}
1717
// require_once '/Users/dmsnell/code/WordPress-develop/src/wp-includes/html-api/class-wp-html-processor.php';
1818

1919
function esc_attr( $s ) { return str_replace( [ '<', '>', '"' ], [ '&lt;', '&gt;', '&quot;' ], $s ); }
20+
function __( $s ) { return $s; }
21+
function _doing_it_wrong( ...$args ) {
22+
var_dump( $args );
23+
}
2024
}
2125

2226
/**
@@ -2247,4 +2251,97 @@ public function data_updating_attributes_in_malformed_html() {
22472251
),
22482252
);
22492253
}
2254+
2255+
/**
2256+
* @dataProvider data_funky_comments
2257+
*/
2258+
public function test_stops_at_funky_comments( $html, $content ) {
2259+
$p = new WP_HTML_Tag_Processor( $html );
2260+
2261+
$this->assertTrue( $p->next_tag( array( 'funky_comments' => 'visit' ) ) );
2262+
$this->assertEquals( $content, $p->get_funky_content() );
2263+
}
2264+
2265+
public function data_funky_comments() {
2266+
return array(
2267+
'Isolated comment' => array( '</1>', '1' ),
2268+
'Inside text' => array( 'Before</1>After', '1' ),
2269+
'%name syntax' => array( 'Today is </%day>.', '%day' ),
2270+
'With spaces inside' => array( 'What </$variable is this>?', '$variable is this' ),
2271+
);
2272+
}
2273+
2274+
/**
2275+
* @dataProvider data_declarative_patterns
2276+
*/
2277+
public function test_matches_declarative_pattern( $pattern, $html, $matches ) {
2278+
$p = new WP_HTML_Tag_Processor( $html );
2279+
2280+
if ( $matches ) {
2281+
$this->assertTrue( $p->declarative_match( $pattern ) );
2282+
} else {
2283+
$this->assertFalse( $p->declarative_match( $pattern ) );
2284+
}
2285+
}
2286+
2287+
public function data_declarative_patterns() {
2288+
return array(
2289+
'Single tag' => array( '<div>', '<div>', true ),
2290+
'^Single tag' => array( '<div>', '<img>', false ),
2291+
'Wrapped image' => array( '<div><img></div>', '<div><img></div>', true ),
2292+
'Wrapped image w/attributes' => array( '<div><img></div>', '<div id="14"><img src="hallumi" inert></div>', true ),
2293+
'Prefix before match' => array( '<li><img></li>', '<main><h1>Stuff!</h1><ul><li><img></li></ul></main>', true ),
2294+
'Pattern with attribute' => array( '<li is-active><img></li>', '<li is-active><img></li>', true ),
2295+
'^Pattern with attribute' => array( '<li is-active><img></li>', '<li><img></li>', false ),
2296+
'Pattern with attributes' => array( '<li is-active class="slick"><img></li>', '<li class="slick" is-active><img></li>', true ),
2297+
'^Pattern with attributes' => array( '<li is-active class="slick"><img></li>', '<li id="slick" is-active><img></li>', false ),
2298+
'^Pattern with attributes 2' => array( '<li is-active class="slick"><img></li>', '<li class="wicket" is-active><img></li>', false ),
2299+
'Test with attributes' => array( '<li is-active><img></li>', '<li id="5" is-funky=maybe style=\'color: red;\' is-active class="test-class bright"><img></li>', true ),
2300+
'^Test with attributes' => array( '<li is-active><img></li>', '<li id="5" is-funky=maybe style=\'color: red;\' isactive class="test-class bright"><img></li>', false ),
2301+
'Attribute with value' => array( '<input disabled>', '<input type="text"><input><input disabled><input value="5">', true ),
2302+
'Attribute with text' => array( '<input id="5">', '<input type="text"><input><input id=5><input disabled><input value="5">', true ),
2303+
'^Attribute with value' => array( '<input disabled>', '<input type="text"><input><input disable><input value="5">', false ),
2304+
'Wildcard' => array( '<hgroup></1></2></hgroup>', '<hgroup><h1>Important</h1></hgroup>', true ),
2305+
'^Wildcard' => array( '<hgroup></1></2></hgroup>', '<hgroup><img></hgroup>', false ),
2306+
'Wildcard attributes' => array( '</1 aria-label="placeholder">', '<div><p><strong>This</strong> is <em aria-label="placeholder">really</em> cool!</p></div>', true ),
2307+
);
2308+
}
2309+
2310+
public function test_declarative_match_pauses_at_start_of_match() {
2311+
$p = new WP_HTML_Tag_Processor( '<main><h1>Stuff!</h1><ul><li pick-me><img></li></ul></main>' );
2312+
2313+
$this->assertTrue( $p->declarative_match( '<li><img></li>' ) );
2314+
$this->assertTrue( $p->get_attribute( 'pick-me' ) );
2315+
}
2316+
2317+
public function test_declarative_match_bookmarks_markup_wildcards_delete_me_this_is_an_internal_detail_but_for_now_helpful_for_development() {
2318+
$p = new WP_HTML_Tag_Processor( <<<HTML
2319+
<main>
2320+
<h1>Stuff!</h1>
2321+
<ul>
2322+
<li id=1><p>Just a thought</p></li>
2323+
<img>
2324+
<li id=2 pick-me><img></li>
2325+
</ul>
2326+
</main>
2327+
HTML
2328+
);
2329+
2330+
$p->next_tag();
2331+
2332+
$this->assertTrue( $p->declarative_match( '<li></1></li>' ) );
2333+
$p->seek( '__placeholder_1' );
2334+
$this->assertSame( 'IMG', $p->get_tag() );
2335+
2336+
$p->rewind();
2337+
$this->assertTrue( $p->declarative_match( '<main></1></2><ul>' ) );
2338+
2339+
$p->seek( '__placeholder_1' );
2340+
$this->assertSame( 'H1', $p->get_tag() );
2341+
$this->assertFalse( $p->is_tag_closer() );
2342+
2343+
$p->seek( '__placeholder_2' );
2344+
$this->assertSame( 'H1', $p->get_tag() );
2345+
$this->assertTrue( $p->is_tag_closer() );
2346+
}
22502347
}

0 commit comments

Comments
 (0)