Skip to content

Commit a8dd424

Browse files
committed
HTML API: Use Tag Processor when adding rel keywords to A elements.
Prep work for WordPress#9248 See also WordPress#9251
1 parent 0fd771a commit a8dd424

File tree

3 files changed

+137
-32
lines changed

3 files changed

+137
-32
lines changed

src/wp-includes/formatting.php

Lines changed: 72 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3220,34 +3220,81 @@ function _split_str_by_whitespace( $text, $goal ) {
32203220
* @return string HTML A element with the added rel attribute.
32213221
*/
32223222
function wp_rel_callback( $matches, $rel ) {
3223-
$text = $matches[1];
3224-
$atts = wp_kses_hair( $matches[1], wp_allowed_protocols() );
3223+
_deprecated_function(
3224+
__FUNCTION__,
3225+
'{WP_VERSION}',
3226+
'wp_include_in_all_a_rel()'
3227+
);
3228+
return wp_include_in_all_a_rel( $matches[0], $rel );
3229+
}
32253230

3226-
if ( ! empty( $atts['href'] ) && wp_is_internal_link( $atts['href']['value'] ) ) {
3227-
$rel = trim( str_replace( 'nofollow', '', $rel ) );
3231+
/**
3232+
* Ensures that all A elements in the given HTML contain
3233+
* the provided and unique “rel” keywords.
3234+
*
3235+
* Example:
3236+
*
3237+
* `<a rel="nofollow">` === wp_include_in_all_a_rel( '<a>', 'nofollow' );
3238+
* `<a rel="nofollow">` === wp_include_in_all_a_rel( '<a rel="nofollow">', 'nofollow' );
3239+
* `<a rel="pingback nofollow">` === wp_include_in_all_a_rel( '<a rel="pingback">', 'nofollow' );
3240+
* `<a rel="a b c">` === wp_include_in_all_a_rel( '<a rel="a a a">`, 'a a a b b c' );
3241+
*
3242+
* @since {WP_VERSION}
3243+
*
3244+
* @param string $html Add the given `rel` keywords to every `A` tag in this HTML.
3245+
* @param string $space_separated_rel_keywords Each of these keywords will be present in the final HTML.
3246+
* @return string Modified HTML with all `A` tags containing the given `rel` keywords.
3247+
*/
3248+
function wp_include_in_all_a_rel( $html, $space_separated_rel_keywords ) {
3249+
if ( empty( $html ) || empty( $space_separated_rel_keywords ) ) {
3250+
return $html;
32283251
}
32293252

3230-
if ( ! empty( $atts['rel'] ) ) {
3231-
$parts = array_map( 'trim', explode( ' ', $atts['rel']['value'] ) );
3232-
$rel_array = array_map( 'trim', explode( ' ', $rel ) );
3233-
$parts = array_unique( array_merge( $parts, $rel_array ) );
3234-
$rel = implode( ' ', $parts );
3235-
unset( $atts['rel'] );
3253+
/*
3254+
* It’s not necessary to add the `nofollow` guard to internal links;
3255+
* these are used to only check and remove `nofollow` when adding it.
3256+
*/
3257+
$without_nofollow = $space_separated_rel_keywords;
3258+
$adding_no_follow = false;
32363259

3237-
$html = '';
3238-
foreach ( $atts as $name => $value ) {
3239-
if ( isset( $value['vless'] ) && 'y' === $value['vless'] ) {
3240-
$html .= $name . ' ';
3260+
/*
3261+
* Although this could falsely match on longer tokens like `nofollowers`,
3262+
* it’s safe to check generously since the parsing will ensure that only
3263+
* `nofollow` is removed; only a bit of unnecessary processing will occur.
3264+
*/
3265+
if ( str_contains( $without_nofollow, 'nofollow' ) ) {
3266+
$tokens = WP_HTML_Attribute::from_unordered_set_of_space_separated_tokens( $without_nofollow );
3267+
$without_nofollow = '';
3268+
3269+
foreach ( $tokens as $token ) {
3270+
if ( 'nofollow' === $token ) {
3271+
$adding_no_follow = true;
32413272
} else {
3242-
$html .= "{$name}=\"" . esc_attr( $value['value'] ) . '" ';
3273+
$without_nofollow .= " {$token}";
32433274
}
32443275
}
3245-
$text = trim( $html );
32463276
}
32473277

3248-
$rel_attr = $rel ? ' rel="' . esc_attr( $rel ) . '"' : '';
3278+
// Update the `rel` attributes in every `A` element.
3279+
$processor = new WP_HTML_Tag_Processor( $html );
3280+
while ( $processor->next_tag( 'A' ) ) {
3281+
$rel = $processor->get_attribute( 'rel' );
3282+
$rel = is_string( $rel ) ? $rel : '';
32493283

3250-
return "<a {$text}{$rel_attr}>";
3284+
$href = $adding_no_follow ? $processor->get_attribute( 'href' ) : null;
3285+
$skip_nofollow = is_string( $href ) && wp_is_internal_link( $href );
3286+
3287+
$combined = $skip_nofollow
3288+
? "{$rel} {$without_nofollow}"
3289+
: "{$rel} {$space_separated_rel_keywords}";
3290+
3291+
$tokens = WP_HTML_Attribute::from_unordered_set_of_space_separated_tokens( $combined );
3292+
$new_rel = empty( $tokens ) ? false : implode( ' ', $tokens );
3293+
3294+
$processor->set_attribute( 'rel', $new_rel );
3295+
}
3296+
3297+
return $processor->get_updated_html();
32513298
}
32523299

32533300
/**
@@ -3261,13 +3308,7 @@ function wp_rel_callback( $matches, $rel ) {
32613308
function wp_rel_nofollow( $text ) {
32623309
// This is a pre-save filter, so text is already escaped.
32633310
$text = stripslashes( $text );
3264-
$text = preg_replace_callback(
3265-
'|<a (.+?)>|i',
3266-
static function ( $matches ) {
3267-
return wp_rel_callback( $matches, 'nofollow' );
3268-
},
3269-
$text
3270-
);
3311+
$text = wp_include_in_all_a_rel( $text, 'nofollow' );
32713312
return wp_slash( $text );
32723313
}
32733314

@@ -3281,6 +3322,11 @@ static function ( $matches ) {
32813322
* @return string HTML A Element with `rel="nofollow"`.
32823323
*/
32833324
function wp_rel_nofollow_callback( $matches ) {
3325+
_deprecated_function(
3326+
__FUNCTION__,
3327+
'{WP_VERSION}',
3328+
'wp_include_in_all_a_rel()'
3329+
);
32843330
return wp_rel_callback( $matches, 'nofollow' );
32853331
}
32863332

@@ -3295,13 +3341,7 @@ function wp_rel_nofollow_callback( $matches ) {
32953341
function wp_rel_ugc( $text ) {
32963342
// This is a pre-save filter, so text is already escaped.
32973343
$text = stripslashes( $text );
3298-
$text = preg_replace_callback(
3299-
'|<a (.+?)>|i',
3300-
static function ( $matches ) {
3301-
return wp_rel_callback( $matches, 'nofollow ugc' );
3302-
},
3303-
$text
3304-
);
3344+
$text = wp_include_in_all_a_rel( $text, 'nofollow ugc' );
33053345
return wp_slash( $text );
33063346
}
33073347

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
<?php
2+
3+
class WP_HTML_Attribute {
4+
/**
5+
* Parses and returns an unordered set of space-separated tokens.
6+
*
7+
* Tokens in the returned array appear in the same order as they are uniquely
8+
* found in the given attribute value string. When case-insensitive, output
9+
* tokens will all be ASCII lowercase.
10+
*
11+
* Example:
12+
*
13+
* array( 'a', 'b', 'c' ) === WP_HTML_Attribute::from_unordered_set_of_space_separated_tokens( "a b a\t\nc" );
14+
*
15+
* > A set of space-separated tokens is a string containing zero or more
16+
* > words (known as tokens) separated by one or more ASCII whitespace,
17+
* > where words consist of any string of one or more characters, none
18+
* > of which are ASCII whitespace.
19+
*
20+
* > An unordered set of unique space-separated tokens is a set of
21+
* > space-separated tokens where none of the tokens are duplicated.
22+
*
23+
* > How tokens in a set of space-separated tokens are to be compared
24+
* > (e.g. case-sensitively or not) is defined on a per-set basis.
25+
*
26+
* @see https://html.spec.whatwg.org/#unordered-set-of-unique-space-separated-tokens
27+
*
28+
* @since {WP_VERSION}
29+
*
30+
* @param string $attribute_value HTML-decoded attribute value to parse.
31+
* @param string $case_sensitivity Optional. Constrain uniqueness with 'case-sensitive'
32+
* or 'case-insensitive'. Default 'case-sensitive'.
33+
* @return string[] Set of unique tokens parsed from attribute value.
34+
*/
35+
public static function from_unordered_set_of_space_separated_tokens( $attribute_value, $case_sensitivity = 'case-sensitive' ) {
36+
if ( empty( $attribute_value ) ) {
37+
return array();
38+
}
39+
40+
if ( 'case-insensitive' === $case_sensitivity ) {
41+
$attribute_value = strtolower( $attribute_value );
42+
}
43+
44+
$tokens = array();
45+
$uniques = ' ';
46+
$at = 0;
47+
$end = strlen( $attribute_value );
48+
while ( $at < $end ) {
49+
$at += strspn( $attribute_value, " \t\f\r\n", $at );
50+
51+
$word_length = strcspn( $attribute_value, " \t\f\r\n", $at );
52+
$word = substr( $attribute_value, $at, $word_length );
53+
54+
if ( 0 < $word_length && ! str_contains( $uniques, " {$word} " ) ) {
55+
$uniques .= "{$word} ";
56+
$tokens[] = $word;
57+
}
58+
59+
$at += $word_length;
60+
}
61+
62+
return $tokens;
63+
}
64+
}

src/wp-settings.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@
266266
require ABSPATH . WPINC . '/html-api/class-wp-html-stack-event.php';
267267
require ABSPATH . WPINC . '/html-api/class-wp-html-processor-state.php';
268268
require ABSPATH . WPINC . '/html-api/class-wp-html-processor.php';
269+
require ABSPATH . WPINC . '/html-api/class-wp-html-attribute.php';
269270
require ABSPATH . WPINC . '/class-wp-http.php';
270271
require ABSPATH . WPINC . '/class-wp-http-streams.php';
271272
require ABSPATH . WPINC . '/class-wp-http-curl.php';

0 commit comments

Comments
 (0)