@@ -1385,149 +1385,49 @@ function wp_kses_attr_check( &$name, &$value, &$whole, $vless, $element, $allowe
1385
1385
* attribute defined first (`foo='bar' foo='baz'` will result in `foo='bar'`).
1386
1386
*
1387
1387
* @since 1.0.0
1388
+ * @since 6.9.0 Rebuilt on HTML API
1388
1389
*
1389
1390
* @param string $attr Attribute list from HTML element to closing HTML element tag.
1390
1391
* @param string[] $allowed_protocols Array of allowed URL protocols.
1391
1392
* @return array[] Array of attribute information after parsing.
1392
1393
*/
1393
1394
function wp_kses_hair ( $ attr , $ allowed_protocols ) {
1394
- $ attrarr = array ();
1395
- $ mode = 0 ;
1396
- $ attrname = '' ;
1397
- $ uris = wp_kses_uri_attributes ();
1395
+ $ attributes = array ();
1396
+ $ uris = wp_kses_uri_attributes ();
1398
1397
1399
1398
// Loop through the whole attribute list.
1400
1399
1401
- while ( strlen ( $ attr ) !== 0 ) {
1402
- $ working = 0 ; // Was the last operation successful?
1400
+ $ processor = new WP_HTML_Tag_Processor ( " <wp { $ attr} > " );
1401
+ $ processor -> next_token ();
1403
1402
1404
- switch ( $ mode ) {
1405
- case 0 :
1406
- if ( preg_match ( '/^([_a-zA-Z][-_a-zA-Z0-9:.]*)/ ' , $ attr , $ match ) ) {
1407
- $ attrname = $ match [1 ];
1408
- $ working = 1 ;
1409
- $ mode = 1 ;
1410
- $ attr = preg_replace ( '/^[_a-zA-Z][-_a-zA-Z0-9:.]*/ ' , '' , $ attr );
1411
- }
1412
-
1413
- break ;
1414
-
1415
- case 1 :
1416
- if ( preg_match ( '/^\s*=\s*/ ' , $ attr ) ) { // Equals sign.
1417
- $ working = 1 ;
1418
- $ mode = 2 ;
1419
- $ attr = preg_replace ( '/^\s*=\s*/ ' , '' , $ attr );
1420
- break ;
1421
- }
1422
-
1423
- if ( preg_match ( '/^\s+/ ' , $ attr ) ) { // Valueless.
1424
- $ working = 1 ;
1425
- $ mode = 0 ;
1426
-
1427
- if ( false === array_key_exists ( $ attrname , $ attrarr ) ) {
1428
- $ attrarr [ $ attrname ] = array (
1429
- 'name ' => $ attrname ,
1430
- 'value ' => '' ,
1431
- 'whole ' => $ attrname ,
1432
- 'vless ' => 'y ' ,
1433
- );
1434
- }
1435
-
1436
- $ attr = preg_replace ( '/^\s+/ ' , '' , $ attr );
1437
- }
1438
-
1439
- break ;
1440
-
1441
- case 2 :
1442
- if ( preg_match ( '%^"([^"]*)"(\s+|/?$)% ' , $ attr , $ match ) ) {
1443
- // "value"
1444
- $ thisval = $ match [1 ];
1445
- if ( in_array ( strtolower ( $ attrname ), $ uris , true ) ) {
1446
- $ thisval = wp_kses_bad_protocol ( $ thisval , $ allowed_protocols );
1447
- }
1448
-
1449
- if ( false === array_key_exists ( $ attrname , $ attrarr ) ) {
1450
- $ attrarr [ $ attrname ] = array (
1451
- 'name ' => $ attrname ,
1452
- 'value ' => $ thisval ,
1453
- 'whole ' => "$ attrname= \"$ thisval \"" ,
1454
- 'vless ' => 'n ' ,
1455
- );
1456
- }
1457
-
1458
- $ working = 1 ;
1459
- $ mode = 0 ;
1460
- $ attr = preg_replace ( '/^"[^"]*"(\s+|$)/ ' , '' , $ attr );
1461
- break ;
1462
- }
1463
-
1464
- if ( preg_match ( "%^'([^']*)'(\s+|/?$)% " , $ attr , $ match ) ) {
1465
- // 'value'
1466
- $ thisval = $ match [1 ];
1467
- if ( in_array ( strtolower ( $ attrname ), $ uris , true ) ) {
1468
- $ thisval = wp_kses_bad_protocol ( $ thisval , $ allowed_protocols );
1469
- }
1470
-
1471
- if ( false === array_key_exists ( $ attrname , $ attrarr ) ) {
1472
- $ attrarr [ $ attrname ] = array (
1473
- 'name ' => $ attrname ,
1474
- 'value ' => $ thisval ,
1475
- 'whole ' => "$ attrname=' $ thisval' " ,
1476
- 'vless ' => 'n ' ,
1477
- );
1478
- }
1479
-
1480
- $ working = 1 ;
1481
- $ mode = 0 ;
1482
- $ attr = preg_replace ( "/^'[^']*'(\s+|$)/ " , '' , $ attr );
1483
- break ;
1484
- }
1485
-
1486
- if ( preg_match ( "%^([^\s \"']+)(\s+|/?$)% " , $ attr , $ match ) ) {
1487
- // value
1488
- $ thisval = $ match [1 ];
1489
- if ( in_array ( strtolower ( $ attrname ), $ uris , true ) ) {
1490
- $ thisval = wp_kses_bad_protocol ( $ thisval , $ allowed_protocols );
1491
- }
1492
-
1493
- if ( false === array_key_exists ( $ attrname , $ attrarr ) ) {
1494
- $ attrarr [ $ attrname ] = array (
1495
- 'name ' => $ attrname ,
1496
- 'value ' => $ thisval ,
1497
- 'whole ' => "$ attrname= \"$ thisval \"" ,
1498
- 'vless ' => 'n ' ,
1499
- );
1500
- }
1501
-
1502
- // We add quotes to conform to W3C's HTML spec.
1503
- $ working = 1 ;
1504
- $ mode = 0 ;
1505
- $ attr = preg_replace ( "%^[^\s \"']+(\s+|$)% " , '' , $ attr );
1506
- }
1403
+ foreach ( $ processor ->get_attribute_names_with_prefix ( '' ) as $ name ) {
1404
+ $ value = $ processor ->get_attribute ( $ name );
1405
+ $ is_bool = true === $ value ;
1406
+ if ( is_string ( $ value ) && in_array ( $ name , $ uris , true ) ) {
1407
+ $ value = wp_kses_bad_protocol ( $ value , $ allowed_protocols );
1408
+ }
1507
1409
1508
- break ;
1509
- } // End switch.
1410
+ // Reconstruct and normalize the attribute value.
1411
+ $ syntax_characters = array (
1412
+ '& ' => '& ' ,
1413
+ '< ' => '< ' ,
1414
+ '> ' => '> ' ,
1415
+ "' " => '' ' ,
1416
+ '" ' => '" ' ,
1417
+ );
1510
1418
1511
- if ( 0 === $ working ) { // Not well-formed, remove and try again.
1512
- $ attr = wp_kses_html_error ( $ attr );
1513
- $ mode = 0 ;
1514
- }
1515
- } // End while.
1419
+ $ recoded = $ is_bool ? '' : strtr ( $ value , $ syntax_characters );
1420
+ $ whole = $ is_bool ? $ name : "{$ name }= \"{$ recoded }\"" ;
1516
1421
1517
- if ( 1 === $ mode && false === array_key_exists ( $ attrname , $ attrarr ) ) {
1518
- /*
1519
- * Special case, for when the attribute list ends with a valueless
1520
- * attribute like "selected".
1521
- */
1522
- $ attrarr [ $ attrname ] = array (
1523
- 'name ' => $ attrname ,
1524
- 'value ' => '' ,
1525
- 'whole ' => $ attrname ,
1526
- 'vless ' => 'y ' ,
1422
+ $ attributes [] = array (
1423
+ 'name ' => $ name ,
1424
+ 'value ' => $ recoded ,
1425
+ 'whole ' => $ whole ,
1426
+ 'vless ' => $ is_bool ,
1527
1427
);
1528
1428
}
1529
1429
1530
- return $ attrarr ;
1430
+ return $ attributes ;
1531
1431
}
1532
1432
1533
1433
/**
0 commit comments