diff --git a/wp-includes/default-filters.php b/wp-includes/default-filters.php index 93fac65c7d..8d926e698e 100644 --- a/wp-includes/default-filters.php +++ b/wp-includes/default-filters.php @@ -87,6 +87,7 @@ add_filter( 'post_mime_type', 'sanitize_mime_type' ); // Places to balance tags on input foreach ( array( 'content_save_pre', 'excerpt_save_pre', 'comment_save_pre', 'pre_comment_content' ) as $filter ) { + add_filter( $filter, 'convert_invalid_entities' ); add_filter( $filter, 'balanceTags', 50 ); } diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index e619e2c0d6..1b1d4b1b62 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -1503,11 +1503,7 @@ function sanitize_html_class( $class, $fallback = '' ) { } /** - * Converts a number of characters from a string. - * - * Metadata tags `` and `<category>` are removed, `<br>` and `<hr>` are - * converted into correct XHTML and Unicode characters are converted to the - * valid range. + * Converts lone & characters into `&` (a.k.a. `&`) * * @since 0.71 * @@ -1516,58 +1512,64 @@ function sanitize_html_class( $class, $fallback = '' ) { * @return string Converted string. */ function convert_chars( $content, $deprecated = '' ) { - if ( !empty( $deprecated ) ) + if ( ! empty( $deprecated ) ) { _deprecated_argument( __FUNCTION__, '0.71' ); + } - // Translation of invalid Unicode references range to valid range + if ( strpos( $content, '&' ) !== false ) { + $content = preg_replace( '/&([^#])(?![a-z1-4]{1,8};)/i', '&$1', $content ); + } + + return $content; +} + +/** + * Converts invalid Unicode references range to valid range. + * + * @since 4.3 + * + * @param string $content String with entities that need converting. + * @return string Converted string. + */ +function convert_invalid_entities( $content ) { $wp_htmltranswinuni = array( - '€' => '€', // the Euro sign - '' => '', - '‚' => '‚', // these are Windows CP1252 specific characters - 'ƒ' => 'ƒ', // they would look weird on non-Windows browsers - '„' => '„', - '…' => '…', - '†' => '†', - '‡' => '‡', - 'ˆ' => 'ˆ', - '‰' => '‰', - 'Š' => 'Š', - '‹' => '‹', - 'Œ' => 'Œ', - '' => '', - 'Ž' => 'Ž', - '' => '', - '' => '', - '‘' => '‘', - '’' => '’', - '“' => '“', - '”' => '”', - '•' => '•', - '–' => '–', - '—' => '—', - '˜' => '˜', - '™' => '™', - 'š' => 'š', - '›' => '›', - 'œ' => 'œ', - '' => '', - 'ž' => 'ž', - 'Ÿ' => 'Ÿ' + '€' => '€', // the Euro sign + '' => '', + '‚' => '‚', // these are Windows CP1252 specific characters + 'ƒ' => 'ƒ', // they would look weird on non-Windows browsers + '„' => '„', + '…' => '…', + '†' => '†', + '‡' => '‡', + 'ˆ' => 'ˆ', + '‰' => '‰', + 'Š' => 'Š', + '‹' => '‹', + 'Œ' => 'Œ', + '' => '', + 'Ž' => 'Ž', + '' => '', + '' => '', + '‘' => '‘', + '’' => '’', + '“' => '“', + '”' => '”', + '•' => '•', + '–' => '–', + '—' => '—', + '˜' => '˜', + '™' => '™', + 'š' => 'š', + '›' => '›', + 'œ' => 'œ', + '' => '', + 'ž' => 'ž', + 'Ÿ' => 'Ÿ' ); - // Remove metadata tags - $content = preg_replace('/<title>(.+?)<\/title>/','',$content); - $content = preg_replace('/<category>(.+?)<\/category>/','',$content); - - // Converts lone & characters into & (a.k.a. &) - $content = preg_replace('/&([^#])(?![a-z1-4]{1,8};)/i', '&$1', $content); - - // Fix Word pasting - $content = strtr($content, $wp_htmltranswinuni); - - // Just a little XHTML help - $content = str_replace('<br>', '<br />', $content); - $content = str_replace('<hr>', '<hr />', $content); + if ( strpos( $content, '' ) !== false ) { + $content = strtr( $content, $wp_htmltranswinuni ); + } return $content; } diff --git a/wp-includes/version.php b/wp-includes/version.php index b721c1de20..bc65289180 100644 --- a/wp-includes/version.php +++ b/wp-includes/version.php @@ -4,7 +4,7 @@ * * @global string $wp_version */ -$wp_version = '4.3-alpha-32895'; +$wp_version = '4.3-alpha-32896'; /** * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.