Anchor texturize to shortcodes to improve regex efficiency.

For the 4.0 branch; see [30449] for trunk.

props miqrogroove.
see #29557 for segfault issues.

Built from https://develop.svn.wordpress.org/branches/4.0@30450


git-svn-id: http://core.svn.wordpress.org/branches/4.0@30444 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
Andrew Nacin 2014-11-20 14:30:37 +00:00
parent be45cc1d6a
commit d9cd72430f
1 changed files with 21 additions and 8 deletions

View File

@ -28,7 +28,7 @@
* @return string The string replaced with html entities
*/
function wptexturize($text, $reset = false) {
global $wp_cockneyreplace;
global $wp_cockneyreplace, $shortcode_tags;
static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements,
$default_no_texturize_tags, $default_no_texturize_shortcodes, $run_texturize = true;
@ -205,21 +205,34 @@ function wptexturize($text, $reset = false) {
// Look for shortcodes and HTML elements.
$tagnames = array_keys( $shortcode_tags );
$tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) );
$tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex().
$comment_regex =
'!' // Start of comment, after the <.
. '(?:' // Unroll the loop: Consume everything until --> is found.
. '-(?!->)' // Dash not followed by end of comment.
. '[^\-]*+' // Consume non-dashes.
. ')*+' // Loop possessively.
. '-->'; // End of comment.
$regex = '/(' // Capture the entire match.
. '<' // Find start of element.
. '(?(?=!--)' // Is this a comment?
. '.+?--\s*>' // Find end of comment
. $comment_regex // Find end of comment
. '|'
. '[^>]+>' // Find end of element
. ')'
. '|'
. '\[' // Find start of shortcode.
. '\[?' // Shortcodes may begin with [[
. '[\/\[]?' // Shortcodes may begin with [/ or [[
. $tagregexp // Only match registered shortcodes, because performance.
. '(?:'
. '[^\[\]<>]' // Shortcodes do not contain other shortcodes.
. '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical.
. '|'
. '<[^>]+>' // HTML elements permitted. Prevents matching ] before >.
. ')++'
. '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >.
. ')*+' // Possessive critical.
. '\]' // Find end of shortcode.
. '\]?' // Shortcodes may end with ]]
. ')/s';
@ -241,12 +254,12 @@ function wptexturize($text, $reset = false) {
continue;
} elseif ( '[' === $first && 1 === preg_match( '/^\[(?:[^\[\]<>]|<[^>]+>)++\]$/', $curl ) ) {
} elseif ( '[' === $first && 1 === preg_match( '/^\[\/?' . $tagregexp . '(?:[^\[\]<>]+|<[^\[\]>]*>)*+\]$/', $curl ) ) {
// This is a shortcode delimiter.
_wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes );
} elseif ( '[' === $first && 1 === preg_match( '/^\[\[?(?:[^\[\]<>]|<[^>]+>)++\]\]?$/', $curl ) ) {
} elseif ( '[' === $first && 1 === preg_match( '/^\[[\/\[]?' . $tagregexp . '(?:[^\[\]<>]+|<[^\[\]>]*>)*+\]\]?$/', $curl ) ) {
// This is an escaped shortcode delimiter.
// Do not texturize.