Formatting: Improve accuracy of `force_balance_tags()` and add support for custom element tags.
This changeset includes a major iteration on the regular expression used to balance tags, with comprehensive test coverage to ensure that all scenarios are supported or unsupported as expected. Props dmsnell, westonruter, birgire. Fixes #47014. Built from https://develop.svn.wordpress.org/trunk@45929 git-svn-id: http://core.svn.wordpress.org/trunk@45740 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
parent
b55ca66759
commit
4089fad5fb
|
@ -2429,7 +2429,7 @@ function convert_invalid_entities( $content ) {
|
||||||
* @return string Balanced text
|
* @return string Balanced text
|
||||||
*/
|
*/
|
||||||
function balanceTags( $text, $force = false ) { // phpcs:ignore WordPress.NamingConventions.ValidFunctionName.FunctionNameInvalid
|
function balanceTags( $text, $force = false ) { // phpcs:ignore WordPress.NamingConventions.ValidFunctionName.FunctionNameInvalid
|
||||||
if ( $force || get_option( 'use_balanceTags' ) == 1 ) {
|
if ( $force || (int) get_option( 'use_balanceTags' ) === 1 ) {
|
||||||
return force_balance_tags( $text );
|
return force_balance_tags( $text );
|
||||||
} else {
|
} else {
|
||||||
return $text;
|
return $text;
|
||||||
|
@ -2440,6 +2440,7 @@ function balanceTags( $text, $force = false ) { // phpcs:ignore WordPress.Namin
|
||||||
* Balances tags of string using a modified stack.
|
* Balances tags of string using a modified stack.
|
||||||
*
|
*
|
||||||
* @since 2.0.4
|
* @since 2.0.4
|
||||||
|
* @since 5.3.0 Improve accuracy and add support for custom element tags.
|
||||||
*
|
*
|
||||||
* @author Leonard Lin <leonard@acm.org>
|
* @author Leonard Lin <leonard@acm.org>
|
||||||
* @license GPL
|
* @license GPL
|
||||||
|
@ -2469,32 +2470,74 @@ function force_balance_tags( $text ) {
|
||||||
// WP bug fix for LOVE <3 (and other situations with '<' before a number)
|
// WP bug fix for LOVE <3 (and other situations with '<' before a number)
|
||||||
$text = preg_replace( '#<([0-9]{1})#', '<$1', $text );
|
$text = preg_replace( '#<([0-9]{1})#', '<$1', $text );
|
||||||
|
|
||||||
while ( preg_match( '/<(\/?[\w:]*)\s*([^>]*)>/', $text, $regex ) ) {
|
/**
|
||||||
|
* Matches supported tags.
|
||||||
|
*
|
||||||
|
* To get the pattern as a string without the comments paste into a PHP
|
||||||
|
* REPL like `php -a`.
|
||||||
|
*
|
||||||
|
* @see https://html.spec.whatwg.org/#elements-2
|
||||||
|
* @see https://w3c.github.io/webcomponents/spec/custom/#valid-custom-element-name
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* ~# php -a
|
||||||
|
* php > $s = [paste copied contents of expression below including parentheses];
|
||||||
|
* php > echo $s;
|
||||||
|
*/
|
||||||
|
$tag_pattern = (
|
||||||
|
'#<' . // Start with an opening bracket.
|
||||||
|
'(/?)' . // Group 1 - If it's a closing tag it'll have a leading slash.
|
||||||
|
'(' . // Group 2 - Tag name.
|
||||||
|
// Custom element tags have more lenient rules than HTML tag names.
|
||||||
|
'(?:[a-z](?:[a-z0-9._]*)-(?:[a-z0-9._-]+)+)' .
|
||||||
|
'|' .
|
||||||
|
// Traditional tag rules approximate HTML tag names.
|
||||||
|
'(?:[\w:]+)' .
|
||||||
|
')' .
|
||||||
|
'(?:' .
|
||||||
|
// We either immediately close the tag with its '>' and have nothing here.
|
||||||
|
'\s*' .
|
||||||
|
'(/?)' . // Group 3 - "attributes" for empty tag.
|
||||||
|
'|' .
|
||||||
|
// Or we must start with space characters to separate the tag name from the attributes (or whitespace).
|
||||||
|
'(\s+)' . // Group 4 - Pre-attribute whitespace.
|
||||||
|
'([^>]*)' . // Group 5 - Attributes.
|
||||||
|
')' .
|
||||||
|
'>#' // End with a closing bracket.
|
||||||
|
);
|
||||||
|
|
||||||
|
while ( preg_match( $tag_pattern, $text, $regex ) ) {
|
||||||
|
$full_match = $regex[0];
|
||||||
|
$has_leading_slash = ! empty( $regex[1] );
|
||||||
|
$tag_name = $regex[2];
|
||||||
|
$tag = strtolower( $tag_name );
|
||||||
|
$is_single_tag = in_array( $tag, $single_tags, true );
|
||||||
|
$pre_attribute_ws = isset( $regex[4] ) ? $regex[4] : '';
|
||||||
|
$attributes = trim( isset( $regex[5] ) ? $regex[5] : $regex[3] );
|
||||||
|
$has_self_closer = '/' === substr( $attributes, -1 );
|
||||||
|
|
||||||
$newtext .= $tagqueue;
|
$newtext .= $tagqueue;
|
||||||
|
|
||||||
$i = strpos( $text, $regex[0] );
|
$i = strpos( $text, $full_match );
|
||||||
$l = strlen( $regex[0] );
|
$l = strlen( $full_match );
|
||||||
|
|
||||||
// clear the shifter
|
// Clear the shifter.
|
||||||
$tagqueue = '';
|
$tagqueue = '';
|
||||||
// Pop or Push
|
if ( $has_leading_slash ) { // End Tag.
|
||||||
if ( isset( $regex[1][0] ) && '/' == $regex[1][0] ) { // End Tag
|
// If too many closing tags.
|
||||||
$tag = strtolower( substr( $regex[1], 1 ) );
|
|
||||||
// if too many closing tags
|
|
||||||
if ( $stacksize <= 0 ) {
|
if ( $stacksize <= 0 ) {
|
||||||
$tag = '';
|
$tag = '';
|
||||||
// or close to be safe $tag = '/' . $tag;
|
// Or close to be safe $tag = '/' . $tag.
|
||||||
|
|
||||||
// if stacktop value = tag close value then pop
|
// If stacktop value = tag close value, then pop.
|
||||||
} elseif ( $tagstack[ $stacksize - 1 ] == $tag ) { // found closing tag
|
} elseif ( $tagstack[ $stacksize - 1 ] === $tag ) { // Found closing tag.
|
||||||
$tag = '</' . $tag . '>'; // Close Tag
|
$tag = '</' . $tag . '>'; // Close Tag.
|
||||||
// Pop
|
|
||||||
array_pop( $tagstack );
|
array_pop( $tagstack );
|
||||||
$stacksize--;
|
$stacksize--;
|
||||||
} else { // closing tag not at top, search for it
|
} else { // Closing tag not at top, search for it.
|
||||||
for ( $j = $stacksize - 1; $j >= 0; $j-- ) {
|
for ( $j = $stacksize - 1; $j >= 0; $j-- ) {
|
||||||
if ( $tagstack[ $j ] == $tag ) {
|
if ( $tagstack[ $j ] === $tag ) {
|
||||||
// add tag to tagqueue
|
// Add tag to tagqueue.
|
||||||
for ( $k = $stacksize - 1; $k >= $j; $k-- ) {
|
for ( $k = $stacksize - 1; $k >= $j; $k-- ) {
|
||||||
$tagqueue .= '</' . array_pop( $tagstack ) . '>';
|
$tagqueue .= '</' . array_pop( $tagstack ) . '>';
|
||||||
$stacksize--;
|
$stacksize--;
|
||||||
|
@ -2504,39 +2547,33 @@ function force_balance_tags( $text ) {
|
||||||
}
|
}
|
||||||
$tag = '';
|
$tag = '';
|
||||||
}
|
}
|
||||||
} else { // Begin Tag
|
} else { // Begin Tag.
|
||||||
$tag = strtolower( $regex[1] );
|
if ( $has_self_closer ) { // If it presents itself as a self-closing tag...
|
||||||
|
|
||||||
// Tag Cleaning
|
|
||||||
|
|
||||||
// If it's an empty tag "< >", do nothing
|
|
||||||
if ( '' == $tag ) {
|
|
||||||
// do nothing
|
|
||||||
} elseif ( substr( $regex[2], -1 ) == '/' ) { // ElseIf it presents itself as a self-closing tag...
|
|
||||||
// ...but it isn't a known single-entity self-closing tag, then don't let it be treated as such and
|
// ...but it isn't a known single-entity self-closing tag, then don't let it be treated as such and
|
||||||
// immediately close it with a closing tag (the tag will encapsulate no text as a result)
|
// immediately close it with a closing tag (the tag will encapsulate no text as a result)
|
||||||
if ( ! in_array( $tag, $single_tags ) ) {
|
if ( ! $is_single_tag ) {
|
||||||
$regex[2] = trim( substr( $regex[2], 0, -1 ) ) . "></$tag";
|
$attributes = trim( substr( $attributes, 0, -1 ) ) . "></$tag";
|
||||||
}
|
}
|
||||||
} elseif ( in_array( $tag, $single_tags ) ) { // ElseIf it's a known single-entity tag but it doesn't close itself, do so
|
} elseif ( $is_single_tag ) { // ElseIf it's a known single-entity tag but it doesn't close itself, do so
|
||||||
$regex[2] .= '/';
|
$pre_attribute_ws = ' ';
|
||||||
} else { // Else it's not a single-entity tag
|
$attributes .= '/';
|
||||||
// If the top of the stack is the same as the tag we want to push, close previous tag
|
} else { // It's not a single-entity tag.
|
||||||
if ( $stacksize > 0 && ! in_array( $tag, $nestable_tags ) && $tagstack[ $stacksize - 1 ] == $tag ) {
|
// If the top of the stack is the same as the tag we want to push, close previous tag.
|
||||||
|
if ( $stacksize > 0 && ! in_array( $tag, $nestable_tags, true ) && $tagstack[ $stacksize - 1 ] === $tag ) {
|
||||||
$tagqueue = '</' . array_pop( $tagstack ) . '>';
|
$tagqueue = '</' . array_pop( $tagstack ) . '>';
|
||||||
$stacksize--;
|
$stacksize--;
|
||||||
}
|
}
|
||||||
$stacksize = array_push( $tagstack, $tag );
|
$stacksize = array_push( $tagstack, $tag );
|
||||||
}
|
}
|
||||||
|
|
||||||
// Attributes
|
// Attributes.
|
||||||
$attributes = $regex[2];
|
if ( $has_self_closer && $is_single_tag ) {
|
||||||
if ( ! empty( $attributes ) && $attributes[0] != '>' ) {
|
// We need some space - avoid <br/> and prefer <br />.
|
||||||
$attributes = ' ' . $attributes;
|
$pre_attribute_ws = ' ';
|
||||||
}
|
}
|
||||||
|
|
||||||
$tag = '<' . $tag . $attributes . '>';
|
$tag = '<' . $tag . $pre_attribute_ws . $attributes . '>';
|
||||||
//If already queuing a close tag, then put this tag on, too
|
// If already queuing a close tag, then put this tag on too.
|
||||||
if ( ! empty( $tagqueue ) ) {
|
if ( ! empty( $tagqueue ) ) {
|
||||||
$tagqueue .= $tag;
|
$tagqueue .= $tag;
|
||||||
$tag = '';
|
$tag = '';
|
||||||
|
@ -2546,18 +2583,17 @@ function force_balance_tags( $text ) {
|
||||||
$text = substr( $text, $i + $l );
|
$text = substr( $text, $i + $l );
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clear Tag Queue
|
// Clear Tag Queue.
|
||||||
$newtext .= $tagqueue;
|
$newtext .= $tagqueue;
|
||||||
|
|
||||||
// Add Remaining text
|
// Add remaining text.
|
||||||
$newtext .= $text;
|
$newtext .= $text;
|
||||||
|
|
||||||
// Empty Stack
|
|
||||||
while ( $x = array_pop( $tagstack ) ) {
|
while ( $x = array_pop( $tagstack ) ) {
|
||||||
$newtext .= '</' . $x . '>'; // Add remaining tags to close
|
$newtext .= '</' . $x . '>'; // Add remaining tags to close.
|
||||||
}
|
}
|
||||||
|
|
||||||
// WP fix for the bug with HTML comments
|
// WP fix for the bug with HTML comments.
|
||||||
$newtext = str_replace( '< !--', '<!--', $newtext );
|
$newtext = str_replace( '< !--', '<!--', $newtext );
|
||||||
$newtext = str_replace( '< !--', '< !--', $newtext );
|
$newtext = str_replace( '< !--', '< !--', $newtext );
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,7 @@
|
||||||
*
|
*
|
||||||
* @global string $wp_version
|
* @global string $wp_version
|
||||||
*/
|
*/
|
||||||
$wp_version = '5.3-alpha-45928';
|
$wp_version = '5.3-alpha-45929';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.
|
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.
|
||||||
|
|
Loading…
Reference in New Issue