Normalize UTF-8 charset slug detection.
There are several exist places in Core that attempt to detect if a blog charset is UTF-8. Each place attempts to perform the same check, except the logic is spread throughout and there's no single method provided to make this determination in a consistent way. The `_canonical_charset()` method exists, but is marked private for use. In this patch the new `unicode` module provides `is_utf8_charset()` as a method taking an optional charset slug and indicating if it represents UTF-8, examining all of the allowable variants of that slug. Associated code is updated to use this new function, including `_canonical_charset()`. If no slug is provided, it will look up the current `get_option( 'blog_charset' )`. Finally, the test functions governing `_canonical_charset()` have been rewritten as a single test with a data provider instead of as separate test functions. Developed in https://github.com/WordPress/wordpress-develop/pull/6535 Discussed in https://core.trac.wordpress.org/ticket/61182 Fixes #61182. Props dmsnell, jonsurrell. Built from https://develop.svn.wordpress.org/trunk@58147 git-svn-id: http://core.svn.wordpress.org/trunk@57612 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
parent
79045fa10e
commit
d30cd41ed4
|
@ -64,7 +64,7 @@ require_once ABSPATH . 'wp-admin/admin-header.php';
|
||||||
<?php
|
<?php
|
||||||
settings_fields( 'reading' );
|
settings_fields( 'reading' );
|
||||||
|
|
||||||
if ( ! in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ), true ) ) {
|
if ( ! is_utf8_charset() ) {
|
||||||
add_settings_field( 'blog_charset', __( 'Encoding for pages and feeds' ), 'options_reading_blog_charset', 'reading', 'default', array( 'label_for' => 'blog_charset' ) );
|
add_settings_field( 'blog_charset', __( 'Encoding for pages and feeds' ), 'options_reading_blog_charset', 'reading', 'default', array( 'label_for' => 'blog_charset' ) );
|
||||||
}
|
}
|
||||||
?>
|
?>
|
||||||
|
|
|
@ -160,7 +160,7 @@ $allowed_options['privacy'] = array();
|
||||||
|
|
||||||
$mail_options = array( 'mailserver_url', 'mailserver_port', 'mailserver_login', 'mailserver_pass' );
|
$mail_options = array( 'mailserver_url', 'mailserver_port', 'mailserver_login', 'mailserver_pass' );
|
||||||
|
|
||||||
if ( ! in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ), true ) ) {
|
if ( ! is_utf8_charset() ) {
|
||||||
$allowed_options['reading'][] = 'blog_charset';
|
$allowed_options['reading'][] = 'blog_charset';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -91,7 +91,7 @@ function _mb_substr( $str, $start, $length = null, $encoding = null ) {
|
||||||
* The solution below works only for UTF-8, so in case of a different
|
* The solution below works only for UTF-8, so in case of a different
|
||||||
* charset just use built-in substr().
|
* charset just use built-in substr().
|
||||||
*/
|
*/
|
||||||
if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ), true ) ) {
|
if ( ! is_utf8_charset( $encoding ) ) {
|
||||||
return is_null( $length ) ? substr( $str, $start ) : substr( $str, $start, $length );
|
return is_null( $length ) ? substr( $str, $start ) : substr( $str, $start, $length );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -176,7 +176,7 @@ function _mb_strlen( $str, $encoding = null ) {
|
||||||
* The solution below works only for UTF-8, so in case of a different charset
|
* The solution below works only for UTF-8, so in case of a different charset
|
||||||
* just use built-in strlen().
|
* just use built-in strlen().
|
||||||
*/
|
*/
|
||||||
if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ), true ) ) {
|
if ( ! is_utf8_charset( $encoding ) ) {
|
||||||
return strlen( $str );
|
return strlen( $str );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -960,19 +960,7 @@ function _wp_specialchars( $text, $quote_style = ENT_NOQUOTES, $charset = false,
|
||||||
$quote_style = ENT_QUOTES;
|
$quote_style = ENT_QUOTES;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store the site charset as a static to avoid multiple calls to wp_load_alloptions().
|
$charset = _canonical_charset( $charset ? $charset : get_option( 'blog_charset' ) );
|
||||||
if ( ! $charset ) {
|
|
||||||
static $_charset = null;
|
|
||||||
if ( ! isset( $_charset ) ) {
|
|
||||||
$alloptions = wp_load_alloptions();
|
|
||||||
$_charset = isset( $alloptions['blog_charset'] ) ? $alloptions['blog_charset'] : '';
|
|
||||||
}
|
|
||||||
$charset = $_charset;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ), true ) ) {
|
|
||||||
$charset = 'UTF-8';
|
|
||||||
}
|
|
||||||
|
|
||||||
$_quote_style = $quote_style;
|
$_quote_style = $quote_style;
|
||||||
|
|
||||||
|
@ -1114,7 +1102,7 @@ function wp_check_invalid_utf8( $text, $strip = false ) {
|
||||||
// Store the site charset as a static to avoid multiple calls to get_option().
|
// Store the site charset as a static to avoid multiple calls to get_option().
|
||||||
static $is_utf8 = null;
|
static $is_utf8 = null;
|
||||||
if ( ! isset( $is_utf8 ) ) {
|
if ( ! isset( $is_utf8 ) ) {
|
||||||
$is_utf8 = in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ), true );
|
$is_utf8 = is_utf8_charset();
|
||||||
}
|
}
|
||||||
if ( ! $is_utf8 ) {
|
if ( ! $is_utf8 ) {
|
||||||
return $text;
|
return $text;
|
||||||
|
|
|
@ -7474,17 +7474,27 @@ function get_tag_regex( $tag ) {
|
||||||
*
|
*
|
||||||
* @see https://core.trac.wordpress.org/ticket/23688
|
* @see https://core.trac.wordpress.org/ticket/23688
|
||||||
*
|
*
|
||||||
* @param string $charset A charset name.
|
* @param string $charset A charset name, e.g. "UTF-8", "Windows-1252", "SJIS".
|
||||||
* @return string The canonical form of the charset.
|
* @return string The canonical form of the charset.
|
||||||
*/
|
*/
|
||||||
function _canonical_charset( $charset ) {
|
function _canonical_charset( $charset ) {
|
||||||
if ( 'utf-8' === strtolower( $charset ) || 'utf8' === strtolower( $charset ) ) {
|
if ( is_utf8_charset( $charset ) ) {
|
||||||
|
|
||||||
return 'UTF-8';
|
return 'UTF-8';
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( 'iso-8859-1' === strtolower( $charset ) || 'iso8859-1' === strtolower( $charset ) ) {
|
/*
|
||||||
|
* Normalize the ISO-8859-1 family of languages.
|
||||||
|
*
|
||||||
|
* This is not required for htmlspecialchars(), as it properly recognizes all of
|
||||||
|
* the input character sets that here are transformed into "ISO-8859-1".
|
||||||
|
*
|
||||||
|
* @todo Should this entire check be removed since it's not required for the stated purpose?
|
||||||
|
* @todo Should WordPress transform other potential charset equivalents, such as "latin1"?
|
||||||
|
*/
|
||||||
|
if (
|
||||||
|
( 0 === strcasecmp( 'iso-8859-1', $charset ) ) ||
|
||||||
|
( 0 === strcasecmp( 'iso8859-1', $charset ) )
|
||||||
|
) {
|
||||||
return 'ISO-8859-1';
|
return 'ISO-8859-1';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
*
|
*
|
||||||
* @global string $wp_version
|
* @global string $wp_version
|
||||||
*/
|
*/
|
||||||
$wp_version = '6.6-alpha-58146';
|
$wp_version = '6.6-alpha-58147';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.
|
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.
|
||||||
|
|
|
@ -106,6 +106,7 @@ if ( WP_CACHE && apply_filters( 'enable_loading_advanced_cache_dropin', true ) &
|
||||||
wp_set_lang_dir();
|
wp_set_lang_dir();
|
||||||
|
|
||||||
// Load early WordPress files.
|
// Load early WordPress files.
|
||||||
|
require ABSPATH . WPINC . '/unicode.php';
|
||||||
require ABSPATH . WPINC . '/class-wp-list-util.php';
|
require ABSPATH . WPINC . '/class-wp-list-util.php';
|
||||||
require ABSPATH . WPINC . '/formatting.php';
|
require ABSPATH . WPINC . '/formatting.php';
|
||||||
require ABSPATH . WPINC . '/meta.php';
|
require ABSPATH . WPINC . '/meta.php';
|
||||||
|
|
Loading…
Reference in New Issue