Protect newlines inside of `CDATA`. This was breaking things, notably inline JS that used comments for HTML standards compat.

* Tokenize newlines in `WP_Embed::autoembed()` before running `->autoembed_callback()`
* Tokenize newlines with placeholders in `wpautop()` 
* Introduce `wp_html_split()` to DRY the RegEx from `wp_replace_in_html_tags()` and `do_shortcodes_in_html_tags()`

Adds unit tests.

Props miqrogroove, kitchin, azaozz.
Fixes #33106.

Built from https://develop.svn.wordpress.org/trunk@33469


git-svn-id: http://core.svn.wordpress.org/trunk@33436 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
Scott Taylor 2015-07-28 23:03:24 +00:00
parent fa25fe82ef
commit 15a7d98ce6
4 changed files with 69 additions and 47 deletions

View File

@ -129,6 +129,12 @@ class WP_Embed {
* `->maybe_make_link()` can return false on failure. * `->maybe_make_link()` can return false on failure.
*/ */
public function shortcode( $attr, $url = '' ) { public function shortcode( $attr, $url = '' ) {
// This filter can be used to output custom HTML instead of allowing oEmbed to run.
$custom = apply_filters( 'wp_embed_shortcode_custom', false, $attr, $url );
if ( false !== $custom ) {
return $custom;
}
$post = get_post(); $post = get_post();
if ( empty( $url ) && ! empty( $attr['src'] ) ) { if ( empty( $url ) && ! empty( $attr['src'] ) ) {
@ -318,11 +324,14 @@ class WP_Embed {
* @return string Potentially modified $content. * @return string Potentially modified $content.
*/ */
public function autoembed( $content ) { public function autoembed( $content ) {
// Strip newlines from all elements. // Replace line breaks from all HTML elements with placeholders.
$content = wp_replace_in_html_tags( $content, array( "\n" => " " ) ); $content = wp_replace_in_html_tags( $content, array( "\n" => '<!-- wp-line-break -->' ) );
// Find URLs that are on their own line. // Find URLs that are on their own line.
return preg_replace_callback( '|^(\s*)(https?://[^\s"]+)(\s*)$|im', array( $this, 'autoembed_callback' ), $content ); $content = preg_replace_callback( '|^(\s*)(https?://[^\s"]+)(\s*)$|im', array( $this, 'autoembed_callback' ), $content );
// Put the line breaks back.
return str_replace( '<!-- wp-line-break -->', "\n", $content );
} }
/** /**

View File

@ -504,8 +504,8 @@ function wpautop( $pee, $br = true ) {
// Standardize newline characters to "\n". // Standardize newline characters to "\n".
$pee = str_replace(array("\r\n", "\r"), "\n", $pee); $pee = str_replace(array("\r\n", "\r"), "\n", $pee);
// Strip newlines from all elements. // Find newlines in all elements and add placeholders.
$pee = wp_replace_in_html_tags( $pee, array( "\n" => " " ) ); $pee = wp_replace_in_html_tags( $pee, array( "\n" => " <!-- wpnl --> " ) );
// Collapse line breaks before and after <option> elements so they don't get autop'd. // Collapse line breaks before and after <option> elements so they don't get autop'd.
if ( strpos( $pee, '<option' ) !== false ) { if ( strpos( $pee, '<option' ) !== false ) {
@ -592,9 +592,59 @@ function wpautop( $pee, $br = true ) {
if ( !empty($pre_tags) ) if ( !empty($pre_tags) )
$pee = str_replace(array_keys($pre_tags), array_values($pre_tags), $pee); $pee = str_replace(array_keys($pre_tags), array_values($pre_tags), $pee);
// Restore newlines in all elements.
$pee = str_replace( " <!-- wpnl --> ", "\n", $pee );
return $pee; return $pee;
} }
/**
* Separate HTML elements and comments from the text.
*
* @since 4.2.4
*
* @param string $input The text which has to be formatted.
* @return array The formatted text.
*/
function wp_html_split( $input ) {
static $regex;
if ( ! isset( $regex ) ) {
$comments =
'!' // Start of comment, after the <.
. '(?:' // Unroll the loop: Consume everything until --> is found.
. '-(?!->)' // Dash not followed by end of comment.
. '[^\-]*+' // Consume non-dashes.
. ')*+' // Loop possessively.
. '(?:-->)?'; // End of comment. If not found, match all input.
$cdata =
'!\[CDATA\[' // Start of comment, after the <.
. '[^\]]*+' // Consume non-].
. '(?:' // Unroll the loop: Consume everything until ]]> is found.
. '](?!]>)' // One ] not followed by end of comment.
. '[^\]]*+' // Consume non-].
. ')*+' // Loop possessively.
. '(?:]]>)?'; // End of comment. If not found, match all input.
$regex =
'/(' // Capture the entire match.
. '<' // Find start of element.
. '(?(?=!--)' // Is this a comment?
. $comments // Find end of comment.
. '|'
. '(?(?=!\[CDATA\[)' // Is this a comment?
. $cdata // Find end of comment.
. '|'
. '[^>]*>?' // Find end of element. If not found, match all input.
. ')'
. ')'
. ')/s';
}
return preg_split( $regex, $input, -1, PREG_SPLIT_DELIM_CAPTURE );
}
/** /**
* Replace characters or phrases within HTML elements only. * Replace characters or phrases within HTML elements only.
* *
@ -606,25 +656,7 @@ function wpautop( $pee, $br = true ) {
*/ */
function wp_replace_in_html_tags( $haystack, $replace_pairs ) { function wp_replace_in_html_tags( $haystack, $replace_pairs ) {
// Find all elements. // Find all elements.
$comments = $textarr = wp_html_split( $haystack );
'!' // Start of comment, after the <.
. '(?:' // Unroll the loop: Consume everything until --> is found.
. '-(?!->)' // Dash not followed by end of comment.
. '[^\-]*+' // Consume non-dashes.
. ')*+' // Loop possessively.
. '(?:-->)?'; // End of comment. If not found, match all input.
$regex =
'/(' // Capture the entire match.
. '<' // Find start of element.
. '(?(?=!--)' // Is this a comment?
. $comments // Find end of comment.
. '|'
. '[^>]*>?' // Find end of element. If not found, match all input.
. ')'
. ')/s';
$textarr = preg_split( $regex, $haystack, -1, PREG_SPLIT_DELIM_CAPTURE );
$changed = false; $changed = false;
// Optimize when searching for one item. // Optimize when searching for one item.

View File

@ -333,29 +333,10 @@ function do_shortcodes_in_html_tags( $content, $ignore_html ) {
$trans = array( '[' => '&#91;', ']' => '&#93;' ); $trans = array( '[' => '&#91;', ']' => '&#93;' );
$pattern = get_shortcode_regex(); $pattern = get_shortcode_regex();
$textarr = wp_html_split( $content );
$comment_regex =
'!' // Start of comment, after the <.
. '(?:' // Unroll the loop: Consume everything until --> is found.
. '-(?!->)' // Dash not followed by end of comment.
. '[^\-]*+' // Consume non-dashes.
. ')*+' // Loop possessively.
. '(?:-->)?'; // End of comment. If not found, match all input.
$regex =
'/(' // Capture the entire match.
. '<' // Find start of element.
. '(?(?=!--)' // Is this a comment?
. $comment_regex // Find end of comment.
. '|'
. '[^>]*>?' // Find end of element. If not found, match all input.
. ')'
. ')/s';
$textarr = preg_split( $regex, $content, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
foreach ( $textarr as &$element ) { foreach ( $textarr as &$element ) {
if ( '<' !== $element[0] ) { if ( '' == $element || '<' !== $element[0] ) {
continue; continue;
} }
@ -370,7 +351,7 @@ function do_shortcodes_in_html_tags( $content, $ignore_html ) {
continue; continue;
} }
if ( $ignore_html || '<!--' === substr( $element, 0, 4 ) ) { if ( $ignore_html || '<!--' === substr( $element, 0, 4 ) || '<![CDATA[' === substr( $element, 0, 9 ) ) {
// Encode all [ and ] chars. // Encode all [ and ] chars.
$element = strtr( $element, $trans ); $element = strtr( $element, $trans );
continue; continue;

View File

@ -4,7 +4,7 @@
* *
* @global string $wp_version * @global string $wp_version
*/ */
$wp_version = '4.3-beta4-33468'; $wp_version = '4.3-beta4-33469';
/** /**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema. * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.