Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 72 additions & 32 deletions src/wp-includes/formatting.php
Original file line number Diff line number Diff line change
Expand Up @@ -3220,34 +3220,81 @@ function _split_str_by_whitespace( $text, $goal ) {
* @return string HTML A element with the added rel attribute.
*/
function wp_rel_callback( $matches, $rel ) {
$text = $matches[1];
$atts = wp_kses_hair( $matches[1], wp_allowed_protocols() );
_deprecated_function(
__FUNCTION__,
'{WP_VERSION}',
'wp_include_in_all_a_rel()'
);
return wp_include_in_all_a_rel( $matches[0], $rel );
}

if ( ! empty( $atts['href'] ) && wp_is_internal_link( $atts['href']['value'] ) ) {
$rel = trim( str_replace( 'nofollow', '', $rel ) );
/**
* Ensures that all A elements in the given HTML contain
* the provided and unique “rel” keywords.
*
* Example:
*
* `<a rel="nofollow">` === wp_include_in_all_a_rel( '<a>', 'nofollow' );
* `<a rel="nofollow">` === wp_include_in_all_a_rel( '<a rel="nofollow">', 'nofollow' );
* `<a rel="pingback nofollow">` === wp_include_in_all_a_rel( '<a rel="pingback">', 'nofollow' );
* `<a rel="a b c">` === wp_include_in_all_a_rel( '<a rel="a a a">`, 'a a a b b c' );
*
* @since {WP_VERSION}
*
* @param string $html Add the given `rel` keywords to every `A` tag in this HTML.
* @param string $space_separated_rel_keywords Each of these keywords will be present in the final HTML.
* @return string Modified HTML with all `A` tags containing the given `rel` keywords.
*/
function wp_include_in_all_a_rel( $html, $space_separated_rel_keywords ) {
if ( empty( $html ) || empty( $space_separated_rel_keywords ) ) {
return $html;
}

if ( ! empty( $atts['rel'] ) ) {
$parts = array_map( 'trim', explode( ' ', $atts['rel']['value'] ) );
$rel_array = array_map( 'trim', explode( ' ', $rel ) );
$parts = array_unique( array_merge( $parts, $rel_array ) );
$rel = implode( ' ', $parts );
unset( $atts['rel'] );
/*
* It’s not necessary to add the `nofollow` guard to internal links;
* these are used to only check and remove `nofollow` when adding it.
*/
$without_nofollow = $space_separated_rel_keywords;
$adding_no_follow = false;

$html = '';
foreach ( $atts as $name => $value ) {
if ( isset( $value['vless'] ) && 'y' === $value['vless'] ) {
$html .= $name . ' ';
/*
* Although this could falsely match on longer tokens like `nofollowers`,
* it’s safe to check generously since the parsing will ensure that only
* `nofollow` is removed; only a bit of unnecessary processing will occur.
*/
if ( str_contains( $without_nofollow, 'nofollow' ) ) {
$tokens = WP_HTML_Attribute::from_unordered_set_of_space_separated_tokens( $without_nofollow );
$without_nofollow = '';

foreach ( $tokens as $token ) {
if ( 'nofollow' === $token ) {
$adding_no_follow = true;
} else {
$html .= "{$name}=\"" . esc_attr( $value['value'] ) . '" ';
$without_nofollow .= " {$token}";
}
}
$text = trim( $html );
}

$rel_attr = $rel ? ' rel="' . esc_attr( $rel ) . '"' : '';
// Update the `rel` attributes in every `A` element.
$processor = new WP_HTML_Tag_Processor( $html );
while ( $processor->next_tag( 'A' ) ) {
$rel = $processor->get_attribute( 'rel' );
$rel = is_string( $rel ) ? $rel : '';

return "<a {$text}{$rel_attr}>";
$href = $adding_no_follow ? $processor->get_attribute( 'href' ) : null;
$skip_nofollow = is_string( $href ) && wp_is_internal_link( $href );

$combined = $skip_nofollow
? "{$rel} {$without_nofollow}"
: "{$rel} {$space_separated_rel_keywords}";

$tokens = WP_HTML_Attribute::from_unordered_set_of_space_separated_tokens( $combined );
$new_rel = empty( $tokens ) ? false : implode( ' ', $tokens );

$processor->set_attribute( 'rel', $new_rel );
}

return $processor->get_updated_html();
}

/**
Expand All @@ -3261,13 +3308,7 @@ function wp_rel_callback( $matches, $rel ) {
function wp_rel_nofollow( $text ) {
// This is a pre-save filter, so text is already escaped.
$text = stripslashes( $text );
$text = preg_replace_callback(
'|<a (.+?)>|i',
static function ( $matches ) {
return wp_rel_callback( $matches, 'nofollow' );
},
$text
);
$text = wp_include_in_all_a_rel( $text, 'nofollow' );
return wp_slash( $text );
}

Expand All @@ -3281,6 +3322,11 @@ static function ( $matches ) {
* @return string HTML A Element with `rel="nofollow"`.
*/
function wp_rel_nofollow_callback( $matches ) {
_deprecated_function(
__FUNCTION__,
'{WP_VERSION}',
'wp_include_in_all_a_rel()'
);
return wp_rel_callback( $matches, 'nofollow' );
}

Expand All @@ -3295,13 +3341,7 @@ function wp_rel_nofollow_callback( $matches ) {
function wp_rel_ugc( $text ) {
// This is a pre-save filter, so text is already escaped.
$text = stripslashes( $text );
$text = preg_replace_callback(
'|<a (.+?)>|i',
static function ( $matches ) {
return wp_rel_callback( $matches, 'nofollow ugc' );
},
$text
);
$text = wp_include_in_all_a_rel( $text, 'nofollow ugc' );
return wp_slash( $text );
}

Expand Down
64 changes: 64 additions & 0 deletions src/wp-includes/html-api/class-wp-html-attribute.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
<?php

class WP_HTML_Attribute {
/**
* Parses and returns an unordered set of space-separated tokens.
*
* Tokens in the returned array appear in the same order as they are uniquely
* found in the given attribute value string. When case-insensitive, output
* tokens will all be ASCII lowercase.
*
* Example:
*
* array( 'a', 'b', 'c' ) === WP_HTML_Attribute::from_unordered_set_of_space_separated_tokens( "a b a\t\nc" );
*
* > A set of space-separated tokens is a string containing zero or more
* > words (known as tokens) separated by one or more ASCII whitespace,
* > where words consist of any string of one or more characters, none
* > of which are ASCII whitespace.
*
* > An unordered set of unique space-separated tokens is a set of
* > space-separated tokens where none of the tokens are duplicated.
*
* > How tokens in a set of space-separated tokens are to be compared
* > (e.g. case-sensitively or not) is defined on a per-set basis.
*
* @see https://html.spec.whatwg.org/#unordered-set-of-unique-space-separated-tokens
*
* @since {WP_VERSION}
*
* @param string $attribute_value HTML-decoded attribute value to parse.
* @param string $case_sensitivity Optional. Constrain uniqueness with 'case-sensitive'
* or 'case-insensitive'. Default 'case-sensitive'.
* @return string[] Set of unique tokens parsed from attribute value.
*/
public static function from_unordered_set_of_space_separated_tokens( $attribute_value, $case_sensitivity = 'case-sensitive' ) {
if ( empty( $attribute_value ) ) {
return array();
}

if ( 'case-insensitive' === $case_sensitivity ) {
$attribute_value = strtolower( $attribute_value );
}

$tokens = array();
$uniques = ' ';
$at = 0;
$end = strlen( $attribute_value );
while ( $at < $end ) {
$at += strspn( $attribute_value, " \t\f\r\n", $at );

$word_length = strcspn( $attribute_value, " \t\f\r\n", $at );
$word = substr( $attribute_value, $at, $word_length );

if ( 0 < $word_length && ! str_contains( $uniques, " {$word} " ) ) {
$uniques .= "{$word} ";
$tokens[] = $word;
}

$at += $word_length;
}

return $tokens;
}
}
155 changes: 28 additions & 127 deletions src/wp-includes/kses.php
Original file line number Diff line number Diff line change
Expand Up @@ -1385,149 +1385,50 @@ function wp_kses_attr_check( &$name, &$value, &$whole, $vless, $element, $allowe
* attribute defined first (`foo='bar' foo='baz'` will result in `foo='bar'`).
*
* @since 1.0.0
* @since 6.9.0 Rebuilt on HTML API
*
* @param string $attr Attribute list from HTML element to closing HTML element tag.
* @param string[] $allowed_protocols Array of allowed URL protocols.
* @return array[] Array of attribute information after parsing.
*/
function wp_kses_hair( $attr, $allowed_protocols ) {
$attrarr = array();
$mode = 0;
$attrname = '';
$uris = wp_kses_uri_attributes();
$attributes = array();
$uris = wp_kses_uri_attributes();

// Loop through the whole attribute list.

while ( strlen( $attr ) !== 0 ) {
$working = 0; // Was the last operation successful?
$processor = new WP_HTML_Tag_Processor( "<wp {$attr}>" );
$processor->next_token();

switch ( $mode ) {
case 0:
if ( preg_match( '/^([_a-zA-Z][-_a-zA-Z0-9:.]*)/', $attr, $match ) ) {
$attrname = $match[1];
$working = 1;
$mode = 1;
$attr = preg_replace( '/^[_a-zA-Z][-_a-zA-Z0-9:.]*/', '', $attr );
}

break;

case 1:
if ( preg_match( '/^\s*=\s*/', $attr ) ) { // Equals sign.
$working = 1;
$mode = 2;
$attr = preg_replace( '/^\s*=\s*/', '', $attr );
break;
}

if ( preg_match( '/^\s+/', $attr ) ) { // Valueless.
$working = 1;
$mode = 0;

if ( false === array_key_exists( $attrname, $attrarr ) ) {
$attrarr[ $attrname ] = array(
'name' => $attrname,
'value' => '',
'whole' => $attrname,
'vless' => 'y',
);
}

$attr = preg_replace( '/^\s+/', '', $attr );
}

break;

case 2:
if ( preg_match( '%^"([^"]*)"(\s+|/?$)%', $attr, $match ) ) {
// "value"
$thisval = $match[1];
if ( in_array( strtolower( $attrname ), $uris, true ) ) {
$thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols );
}

if ( false === array_key_exists( $attrname, $attrarr ) ) {
$attrarr[ $attrname ] = array(
'name' => $attrname,
'value' => $thisval,
'whole' => "$attrname=\"$thisval\"",
'vless' => 'n',
);
}

$working = 1;
$mode = 0;
$attr = preg_replace( '/^"[^"]*"(\s+|$)/', '', $attr );
break;
}

if ( preg_match( "%^'([^']*)'(\s+|/?$)%", $attr, $match ) ) {
// 'value'
$thisval = $match[1];
if ( in_array( strtolower( $attrname ), $uris, true ) ) {
$thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols );
}

if ( false === array_key_exists( $attrname, $attrarr ) ) {
$attrarr[ $attrname ] = array(
'name' => $attrname,
'value' => $thisval,
'whole' => "$attrname='$thisval'",
'vless' => 'n',
);
}

$working = 1;
$mode = 0;
$attr = preg_replace( "/^'[^']*'(\s+|$)/", '', $attr );
break;
}

if ( preg_match( "%^([^\s\"']+)(\s+|/?$)%", $attr, $match ) ) {
// value
$thisval = $match[1];
if ( in_array( strtolower( $attrname ), $uris, true ) ) {
$thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols );
}

if ( false === array_key_exists( $attrname, $attrarr ) ) {
$attrarr[ $attrname ] = array(
'name' => $attrname,
'value' => $thisval,
'whole' => "$attrname=\"$thisval\"",
'vless' => 'n',
);
}

// We add quotes to conform to W3C's HTML spec.
$working = 1;
$mode = 0;
$attr = preg_replace( "%^[^\s\"']+(\s+|$)%", '', $attr );
}
foreach ( $processor->get_attribute_names_with_prefix( '' ) as $name ) {
$value = $processor->get_attribute( $name );
$is_bool = true === $value;
if ( is_string( $value ) && in_array( $name, $uris, true ) ) {
$value = wp_kses_bad_protocol( $value, $allowed_protocols );
}

break;
} // End switch.
// Reconstruct and normalize the attribute value.
$syntax_characters = array(
'&' => '&amp;',
'<' => '&lt;',
'>' => '&gt;',
"'" => '&apos;',
'"' => '&quot;',
);

if ( 0 === $working ) { // Not well-formed, remove and try again.
$attr = wp_kses_html_error( $attr );
$mode = 0;
}
} // End while.
$recoded = $is_bool ? '' : strtr( $value, $syntax_characters );
$whole = $is_bool ? $name : "{$name}=\"{$recoded}\"";

if ( 1 === $mode && false === array_key_exists( $attrname, $attrarr ) ) {
/*
* Special case, for when the attribute list ends with a valueless
* attribute like "selected".
*/
$attrarr[ $attrname ] = array(
'name' => $attrname,
'value' => '',
'whole' => $attrname,
'vless' => 'y',
// @todo What security issue need review on the names?
$attributes[ $name ] = array(
'name' => $name,
'value' => $recoded,
'whole' => $whole,
'vless' => $is_bool ? 'y' : 'n',
);
}

return $attrarr;
return $attributes;
}

/**
Expand Down
1 change: 1 addition & 0 deletions src/wp-settings.php
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@
require ABSPATH . WPINC . '/html-api/class-wp-html-stack-event.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-processor-state.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-processor.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-attribute.php';
require ABSPATH . WPINC . '/class-wp-http.php';
require ABSPATH . WPINC . '/class-wp-http-streams.php';
require ABSPATH . WPINC . '/class-wp-http-curl.php';
Expand Down
Loading
Loading