The joys of `wptexturize()`:
* Revert parts of [28773] and [28727] and [29748]. * Do not crash PHP. Make the shortcode quantifier possessive to avoid backtracks. * Reduce backtracking in long HTML comments by 100x. * Do not ignore unclosed HTML comments. * Do not break unregistered shortcodes, e.g. `[hello attr="value"]`. * Do not break HTML in shortcode attributes, e.g. `[hello attr="<"]`. * Do not match for shortcodes when there is extra whitespace, e.g. `[ hello ]`. * Add unit tests to show #12690 was not fully resolved. * Tested PHP 5.2.4, 5.2.13, 5.4.32, and 5.5.8. Adds/modifies unit tests. Props miqrogroove. See #29557. Built from https://develop.svn.wordpress.org/trunk@29781 git-svn-id: http://core.svn.wordpress.org/trunk@29553 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
parent
1967dfe898
commit
37a0c36d38
|
@ -28,7 +28,7 @@
|
|||
* @return string The string replaced with html entities
|
||||
*/
|
||||
function wptexturize($text, $reset = false) {
|
||||
global $wp_cockneyreplace, $shortcode_tags;
|
||||
global $wp_cockneyreplace;
|
||||
static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements,
|
||||
$default_no_texturize_tags, $default_no_texturize_shortcodes, $run_texturize = true;
|
||||
|
||||
|
@ -205,45 +205,55 @@ function wptexturize($text, $reset = false) {
|
|||
|
||||
// Look for shortcodes and HTML elements.
|
||||
|
||||
$tagnames = array_keys( $shortcode_tags );
|
||||
$tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) );
|
||||
$tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex().
|
||||
$comment_regex =
|
||||
'!' // Start of comment, after the <.
|
||||
. '(?:' // Unroll the loop: Consume everything until --> is found.
|
||||
. '-(?!->)' // Dash not followed by end of comment.
|
||||
. '[^\-]*+' // Consume non-dashes.
|
||||
. ')*+' // Loop possessively.
|
||||
. '(?:-->)?'; // End of comment. If not found, match all input.
|
||||
|
||||
$regex = '/(' // Capture the entire match.
|
||||
. '<' // Find start of element.
|
||||
. '(?(?=!--)' // Is this a comment?
|
||||
. '.+?--\s*>' // Find end of comment
|
||||
$shortcode_regex =
|
||||
'\[' // Find start of shortcode.
|
||||
. '[\/\[]?' // Shortcodes may begin with [/ or [[
|
||||
. '[^\s\/\[\]]' // No whitespace before name.
|
||||
. '[^\[\]]*+' // Shortcodes do not contain other shortcodes. Possessive critical.
|
||||
. '\]' // Find end of shortcode.
|
||||
. '\]?'; // Shortcodes may end with ]]
|
||||
|
||||
$regex =
|
||||
'/(' // Capture the entire match.
|
||||
. '<' // Find start of element.
|
||||
. '(?(?=!--)' // Is this a comment?
|
||||
. $comment_regex // Find end of comment.
|
||||
. '|'
|
||||
. '[^>]+>' // Find end of element.
|
||||
. ')'
|
||||
. '|'
|
||||
. '[^>]+>' // Find end of element
|
||||
. ')'
|
||||
. '|'
|
||||
. '\[' // Find start of shortcode.
|
||||
. '\[?' // Shortcodes may begin with [[
|
||||
. '\/?' // Closing slash may precede name.
|
||||
. $tagregexp // Only match registered shortcodes, because performance.
|
||||
. '[^\[\]]*' // Shortcodes do not contain other shortcodes.
|
||||
. '\]' // Find end of shortcode.
|
||||
. '\]?' // Shortcodes may end with ]]
|
||||
. ')/s';
|
||||
. $shortcode_regex // Find shortcodes.
|
||||
. ')/s';
|
||||
|
||||
$textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
|
||||
|
||||
foreach ( $textarr as &$curl ) {
|
||||
// Only call _wptexturize_pushpop_element if $curl is a delimiter.
|
||||
$first = $curl[0];
|
||||
if ( '<' === $first && '>' === substr( $curl, -1 ) ) {
|
||||
// This is an HTML delimiter.
|
||||
if ( '<' === $first && '<!--' === substr( $curl, 0, 4 ) ) {
|
||||
// This is an HTML comment delimeter.
|
||||
|
||||
if ( '<!--' !== substr( $curl, 0, 4 ) ) {
|
||||
_wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );
|
||||
}
|
||||
continue;
|
||||
|
||||
} elseif ( '<' === $first && '>' === substr( $curl, -1 ) ) {
|
||||
// This is an HTML element delimiter.
|
||||
|
||||
_wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );
|
||||
|
||||
} elseif ( '' === trim( $curl ) ) {
|
||||
// This is a newline between delimiters. Performance improves when we check this.
|
||||
|
||||
continue;
|
||||
|
||||
} elseif ( '[' === $first && 1 === preg_match( '/^\[\[?\/?' . $tagregexp . '[^\[\]]*\]\]?$/', $curl ) ) {
|
||||
} elseif ( '[' === $first && 1 === preg_match( '/^' . $shortcode_regex . '$/', $curl ) ) {
|
||||
// This is a shortcode delimiter.
|
||||
|
||||
if ( '[[' !== substr( $curl, 0, 2 ) && ']]' !== substr( $curl, -2 ) ) {
|
||||
|
|
|
@ -231,7 +231,7 @@ function get_shortcode_regex() {
|
|||
$tagregexp = join( '|', array_map('preg_quote', $tagnames) );
|
||||
|
||||
// WARNING! Do not change this regex without changing do_shortcode_tag() and strip_shortcode_tag()
|
||||
// Also, see shortcode_unautop() and shortcode.js and wptexturize().
|
||||
// Also, see shortcode_unautop() and shortcode.js.
|
||||
return
|
||||
'\\[' // Opening bracket
|
||||
. '(\\[?)' // 1: Optional second opening bracket for escaping shortcodes: [[tag]]
|
||||
|
|
Loading…
Reference in New Issue