Shortcodes: Fix PCRE performance bugs in `get_shortcode_regexp()` and related to `wptexturize()`, `do_shortcode()`, and `strip_shortcodes()`
Alters unit tests. Props miqrogroove. Fixes #33517. Built from https://develop.svn.wordpress.org/trunk@34747 git-svn-id: http://core.svn.wordpress.org/trunk@34712 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
parent
a985977856
commit
99347fd96e
|
@ -216,9 +216,24 @@ function wptexturize( $text, $reset = false ) {
|
||||||
|
|
||||||
// Look for shortcodes and HTML elements.
|
// Look for shortcodes and HTML elements.
|
||||||
|
|
||||||
$tagnames = array_keys( $shortcode_tags );
|
preg_match_all( '@\[/?([^<>&/\[\]\x00-\x20]++)@', $text, $matches );
|
||||||
$tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) );
|
$tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] );
|
||||||
$tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex().
|
$found_shortcodes = ! empty( $tagnames );
|
||||||
|
if ( $found_shortcodes ) {
|
||||||
|
$tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) );
|
||||||
|
$tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex().
|
||||||
|
$shortcode_regex =
|
||||||
|
'\[' // Find start of shortcode.
|
||||||
|
. '[\/\[]?' // Shortcodes may begin with [/ or [[
|
||||||
|
. $tagregexp // Only match registered shortcodes, because performance.
|
||||||
|
. '(?:'
|
||||||
|
. '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical.
|
||||||
|
. '|'
|
||||||
|
. '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >.
|
||||||
|
. ')*+' // Possessive critical.
|
||||||
|
. '\]' // Find end of shortcode.
|
||||||
|
. '\]?'; // Shortcodes may end with ]]
|
||||||
|
}
|
||||||
|
|
||||||
$comment_regex =
|
$comment_regex =
|
||||||
'!' // Start of comment, after the <.
|
'!' // Start of comment, after the <.
|
||||||
|
@ -228,51 +243,39 @@ function wptexturize( $text, $reset = false ) {
|
||||||
. ')*+' // Loop possessively.
|
. ')*+' // Loop possessively.
|
||||||
. '(?:-->)?'; // End of comment. If not found, match all input.
|
. '(?:-->)?'; // End of comment. If not found, match all input.
|
||||||
|
|
||||||
$shortcode_regex =
|
$html_regex = // Needs replaced with wp_html_split() per Shortcode API Roadmap.
|
||||||
'\[' // Find start of shortcode.
|
'<' // Find start of element.
|
||||||
. '[\/\[]?' // Shortcodes may begin with [/ or [[
|
. '(?(?=!--)' // Is this a comment?
|
||||||
. $tagregexp // Only match registered shortcodes, because performance.
|
. $comment_regex // Find end of comment.
|
||||||
. '(?:'
|
|
||||||
. '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical.
|
|
||||||
. '|'
|
. '|'
|
||||||
. '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >.
|
. '[^>]*>?' // Find end of element. If not found, match all input.
|
||||||
. ')*+' // Possessive critical.
|
. ')';
|
||||||
. '\]' // Find end of shortcode.
|
|
||||||
. '\]?'; // Shortcodes may end with ]]
|
|
||||||
|
|
||||||
$regex =
|
if ( $found_shortcodes ) {
|
||||||
'/(' // Capture the entire match.
|
$regex = '/(' . $html_regex . '|' . $shortcode_regex . ')/s';
|
||||||
. '<' // Find start of element.
|
} else {
|
||||||
. '(?(?=!--)' // Is this a comment?
|
$regex = '/(' . $html_regex . ')/s';
|
||||||
. $comment_regex // Find end of comment.
|
}
|
||||||
. '|'
|
|
||||||
. '[^>]*>' // Find end of element.
|
|
||||||
. ')'
|
|
||||||
. '|'
|
|
||||||
. $shortcode_regex // Find shortcodes.
|
|
||||||
. ')/s';
|
|
||||||
|
|
||||||
$textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
|
$textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
|
||||||
|
|
||||||
foreach ( $textarr as &$curl ) {
|
foreach ( $textarr as &$curl ) {
|
||||||
// Only call _wptexturize_pushpop_element if $curl is a delimiter.
|
// Only call _wptexturize_pushpop_element if $curl is a delimiter.
|
||||||
$first = $curl[0];
|
$first = $curl[0];
|
||||||
if ( '<' === $first && '<!--' === substr( $curl, 0, 4 ) ) {
|
if ( '<' === $first ) {
|
||||||
// This is an HTML comment delimiter.
|
if ( '<!--' === substr( $curl, 0, 4 ) ) {
|
||||||
|
// This is an HTML comment delimeter.
|
||||||
continue;
|
continue;
|
||||||
|
} else {
|
||||||
} elseif ( '<' === $first && '>' === substr( $curl, -1 ) ) {
|
// This is an HTML element delimiter.
|
||||||
// This is an HTML element delimiter.
|
_wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );
|
||||||
|
}
|
||||||
_wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );
|
|
||||||
|
|
||||||
} elseif ( '' === trim( $curl ) ) {
|
} elseif ( '' === trim( $curl ) ) {
|
||||||
// This is a newline between delimiters. Performance improves when we check this.
|
// This is a newline between delimiters. Performance improves when we check this.
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
} elseif ( '[' === $first && 1 === preg_match( '/^' . $shortcode_regex . '$/', $curl ) ) {
|
} elseif ( '[' === $first && $found_shortcodes && 1 === preg_match( '/^' . $shortcode_regex . '$/', $curl ) ) {
|
||||||
// This is a shortcode delimiter.
|
// This is a shortcode delimiter.
|
||||||
|
|
||||||
if ( '[[' !== substr( $curl, 0, 2 ) && ']]' !== substr( $curl, -2 ) ) {
|
if ( '[[' !== substr( $curl, 0, 2 ) && ']]' !== substr( $curl, -2 ) ) {
|
||||||
|
|
|
@ -208,18 +208,17 @@ function do_shortcode( $content, $ignore_html = false ) {
|
||||||
if (empty($shortcode_tags) || !is_array($shortcode_tags))
|
if (empty($shortcode_tags) || !is_array($shortcode_tags))
|
||||||
return $content;
|
return $content;
|
||||||
|
|
||||||
$tagnames = array_keys($shortcode_tags);
|
// Find all registered tag names in $content.
|
||||||
$tagregexp = join( '|', array_map('preg_quote', $tagnames) );
|
preg_match_all( '@\[([^<>&/\[\]\x00-\x20]++)@', $content, $matches );
|
||||||
$pattern = "/\\[($tagregexp)/s";
|
$tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] );
|
||||||
|
|
||||||
if ( 1 !== preg_match( $pattern, $content ) ) {
|
if ( empty( $tagnames ) ) {
|
||||||
// Avoids parsing HTML when there are no shortcodes or embeds anyway.
|
|
||||||
return $content;
|
return $content;
|
||||||
}
|
}
|
||||||
|
|
||||||
$content = do_shortcodes_in_html_tags( $content, $ignore_html );
|
$content = do_shortcodes_in_html_tags( $content, $ignore_html, $tagnames );
|
||||||
|
|
||||||
$pattern = get_shortcode_regex();
|
$pattern = get_shortcode_regex( $tagnames );
|
||||||
$content = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content );
|
$content = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content );
|
||||||
|
|
||||||
// Always restore square braces so we don't break things like <!--[if IE ]>
|
// Always restore square braces so we don't break things like <!--[if IE ]>
|
||||||
|
@ -247,11 +246,15 @@ function do_shortcode( $content, $ignore_html = false ) {
|
||||||
*
|
*
|
||||||
* @global array $shortcode_tags
|
* @global array $shortcode_tags
|
||||||
*
|
*
|
||||||
|
* @param array $tagnames List of shortcodes to find. Optional. Defaults to all registered shortcodes.
|
||||||
* @return string The shortcode search regular expression
|
* @return string The shortcode search regular expression
|
||||||
*/
|
*/
|
||||||
function get_shortcode_regex() {
|
function get_shortcode_regex( $tagnames = null ) {
|
||||||
global $shortcode_tags;
|
global $shortcode_tags;
|
||||||
$tagnames = array_keys($shortcode_tags);
|
|
||||||
|
if ( empty( $tagnames ) ) {
|
||||||
|
$tagnames = array_keys( $shortcode_tags );
|
||||||
|
}
|
||||||
$tagregexp = join( '|', array_map('preg_quote', $tagnames) );
|
$tagregexp = join( '|', array_map('preg_quote', $tagnames) );
|
||||||
|
|
||||||
// WARNING! Do not change this regex without changing do_shortcode_tag() and strip_shortcode_tag()
|
// WARNING! Do not change this regex without changing do_shortcode_tag() and strip_shortcode_tag()
|
||||||
|
@ -337,15 +340,16 @@ function do_shortcode_tag( $m ) {
|
||||||
*
|
*
|
||||||
* @param string $content Content to search for shortcodes
|
* @param string $content Content to search for shortcodes
|
||||||
* @param bool $ignore_html When true, all square braces inside elements will be encoded.
|
* @param bool $ignore_html When true, all square braces inside elements will be encoded.
|
||||||
|
* @param array $tagnames List of shortcodes to find.
|
||||||
* @return string Content with shortcodes filtered out.
|
* @return string Content with shortcodes filtered out.
|
||||||
*/
|
*/
|
||||||
function do_shortcodes_in_html_tags( $content, $ignore_html ) {
|
function do_shortcodes_in_html_tags( $content, $ignore_html, $tagnames ) {
|
||||||
// Normalize entities in unfiltered HTML before adding placeholders.
|
// Normalize entities in unfiltered HTML before adding placeholders.
|
||||||
$trans = array( '[' => '[', ']' => ']' );
|
$trans = array( '[' => '[', ']' => ']' );
|
||||||
$content = strtr( $content, $trans );
|
$content = strtr( $content, $trans );
|
||||||
$trans = array( '[' => '[', ']' => ']' );
|
$trans = array( '[' => '[', ']' => ']' );
|
||||||
|
|
||||||
$pattern = get_shortcode_regex();
|
$pattern = get_shortcode_regex( $tagnames );
|
||||||
$textarr = wp_html_split( $content );
|
$textarr = wp_html_split( $content );
|
||||||
|
|
||||||
foreach ( $textarr as &$element ) {
|
foreach ( $textarr as &$element ) {
|
||||||
|
@ -557,9 +561,17 @@ function strip_shortcodes( $content ) {
|
||||||
if (empty($shortcode_tags) || !is_array($shortcode_tags))
|
if (empty($shortcode_tags) || !is_array($shortcode_tags))
|
||||||
return $content;
|
return $content;
|
||||||
|
|
||||||
$content = do_shortcodes_in_html_tags( $content, true );
|
// Find all registered tag names in $content.
|
||||||
|
preg_match_all( '@\[([^<>&/\[\]\x00-\x20]++)@', $content, $matches );
|
||||||
|
$tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] );
|
||||||
|
|
||||||
$pattern = get_shortcode_regex();
|
if ( empty( $tagnames ) ) {
|
||||||
|
return $content;
|
||||||
|
}
|
||||||
|
|
||||||
|
$content = do_shortcodes_in_html_tags( $content, true, $tagnames );
|
||||||
|
|
||||||
|
$pattern = get_shortcode_regex( $tagnames );
|
||||||
$content = preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content );
|
$content = preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content );
|
||||||
|
|
||||||
// Always restore square braces so we don't break things like <!--[if IE ]>
|
// Always restore square braces so we don't break things like <!--[if IE ]>
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
*
|
*
|
||||||
* @global string $wp_version
|
* @global string $wp_version
|
||||||
*/
|
*/
|
||||||
$wp_version = '4.4-alpha-34746';
|
$wp_version = '4.4-alpha-34747';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.
|
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.
|
||||||
|
|
Loading…
Reference in New Issue