General: Provide _is_utf8_charset() in compat.php for early use

#61182 introduced is_utf8_charset() as a way of standardizing checks for charset slugs referring to UTF-8. This is called by _mb_strlen() inside of compat.php, but is_utf8_charset() is defined in functions.php, which isn't loaded early on. Code calling mb_strlen() early on before functions.php loads in hosts without the multibyte extension therefore may crash.

Reviewed by hellofromTonya.
Merges [58763] to the 6.6 branch.

Props dmsnell, jonsurrell, joemcgill, jorbin.
Fixes #61680.

Built from https://develop.svn.wordpress.org/branches/6.6@58764


git-svn-id: http://core.svn.wordpress.org/branches/6.6@58166 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
Aaron Jorbin 2024-07-18 18:22:16 +00:00
parent 89c4da675f
commit 40b07bd32d
3 changed files with 47 additions and 19 deletions

View File

@ -40,6 +40,43 @@ function _wp_can_use_pcre_u( $set = null ) {
return $utf8_pcre;
}
/**
* Indicates if a given slug for a character set represents the UTF-8 text encoding.
*
* A charset is considered to represent UTF-8 if it is a case-insensitive match
* of "UTF-8" with or without the hyphen.
*
* Example:
*
* true === _is_utf8_charset( 'UTF-8' );
* true === _is_utf8_charset( 'utf8' );
* false === _is_utf8_charset( 'latin1' );
* false === _is_utf8_charset( 'UTF 8' );
*
* // Only strings match.
* false === _is_utf8_charset( [ 'charset' => 'utf-8' ] );
*
* `is_utf8_charset` should be used outside of this file.
*
* @ignore
* @since 6.6.1
*
* @param string $charset_slug Slug representing a text character encoding, or "charset".
* E.g. "UTF-8", "Windows-1252", "ISO-8859-1", "SJIS".
*
* @return bool Whether the slug represents the UTF-8 encoding.
*/
function _is_utf8_charset( $charset_slug ) {
if ( ! is_string( $charset_slug ) ) {
return false;
}
return (
0 === strcasecmp( 'UTF-8', $charset_slug ) ||
0 === strcasecmp( 'UTF8', $charset_slug )
);
}
if ( ! function_exists( 'mb_substr' ) ) :
/**
* Compat function to mimic mb_substr().
@ -91,7 +128,7 @@ function _mb_substr( $str, $start, $length = null, $encoding = null ) {
* The solution below works only for UTF-8, so in case of a different
* charset just use built-in substr().
*/
if ( ! is_utf8_charset( $encoding ) ) {
if ( ! _is_utf8_charset( $encoding ) ) {
return is_null( $length ) ? substr( $str, $start ) : substr( $str, $start, $length );
}
@ -176,7 +213,7 @@ function _mb_strlen( $str, $encoding = null ) {
* The solution below works only for UTF-8, so in case of a different charset
* just use built-in strlen().
*/
if ( ! is_utf8_charset( $encoding ) ) {
if ( ! _is_utf8_charset( $encoding ) ) {
return strlen( $str );
}

View File

@ -7496,26 +7496,17 @@ function get_tag_regex( $tag ) {
* $is_utf8 = is_utf8_charset();
*
* @since 6.6.0
* @since 6.6.1 A wrapper for _is_utf8_charset
*
* @param ?string $blog_charset Slug representing a text character encoding, or "charset".
* E.g. "UTF-8", "Windows-1252", "ISO-8859-1", "SJIS".
* @see _is_utf8_charset
*
* @param string|null $blog_charset Optional. Slug representing a text character encoding, or "charset".
* E.g. "UTF-8", "Windows-1252", "ISO-8859-1", "SJIS".
* Default value is to infer from "blog_charset" option.
* @return bool Whether the slug represents the UTF-8 encoding.
*/
function is_utf8_charset( $blog_charset = null ) {
$charset_to_examine = $blog_charset ?? get_option( 'blog_charset' );
/*
* Only valid string values count: the absence of a charset
* does not imply any charset, let alone UTF-8.
*/
if ( ! is_string( $charset_to_examine ) ) {
return false;
}
return (
0 === strcasecmp( 'UTF-8', $charset_to_examine ) ||
0 === strcasecmp( 'UTF8', $charset_to_examine )
);
return _is_utf8_charset( $blog_charset ?? get_option( 'blog_charset' ) );
}
/**

View File

@ -16,7 +16,7 @@
*
* @global string $wp_version
*/
$wp_version = '6.6.1-alpha-58762';
$wp_version = '6.6.1-alpha-58764';
/**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.