Database: Split the logic of `wpdb::init_charset()` into a separate method.

The logic for determining the appropriate character set and collation to use is becoming more complex, particularly with the recent additions of [37522] and [37523]. As `init_charset()` has side effects, and makes use of constants instead of parameters, it's not possible to unit test this logic.

This commit splits the logic part of `init_charset()` out into a new method, `wpdb::determine_charset()`, along with appropriate unit tests.

See #32105, #37522.

Fixes #36917.


Built from https://develop.svn.wordpress.org/trunk@37601


git-svn-id: http://core.svn.wordpress.org/trunk@37569 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
Gary Pendergast 2016-06-01 02:38:29 +00:00
parent 8dbceabc60
commit 69147eb345
2 changed files with 36 additions and 15 deletions

View File

@ -4,7 +4,7 @@
*
* @global string $wp_version
*/
$wp_version = '4.6-alpha-37600';
$wp_version = '4.6-alpha-37601';
/**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.

View File

@ -735,41 +735,62 @@ class wpdb {
*/
public function init_charset() {
if ( function_exists('is_multisite') && is_multisite() ) {
$this->charset = 'utf8';
$charset = 'utf8';
if ( defined( 'DB_COLLATE' ) && DB_COLLATE ) {
$this->collate = DB_COLLATE;
$collate = DB_COLLATE;
} else {
$this->collate = 'utf8_general_ci';
$collate = 'utf8_general_ci';
}
} elseif ( defined( 'DB_COLLATE' ) ) {
$this->collate = DB_COLLATE;
$collate = DB_COLLATE;
}
if ( defined( 'DB_CHARSET' ) ) {
$this->charset = DB_CHARSET;
$charset = DB_CHARSET;
}
$charset_collate = $this->determine_charset( $charset, $collate );
$this->charset = $charset_collate['charset'];
$this->collate = $charset_collate['collate'];
}
/**
* Given a charset and collation, determine the best charset and collation to use.
*
* For example, when able, utf8mb4 should be used instead of utf8.
*
* @since 4.6.0
*
* @param string $charset The character set to check.
* @param string $collate The collation to check.
*
* @return array The most appropriate character set and collation to use.
*/
public function determine_charset( $charset, $collate ) {
if ( ( $this->use_mysqli && ! ( $this->dbh instanceof mysqli ) ) || empty( $this->dbh ) ) {
return;
return compact( 'charset', 'collate' );
}
if ( 'utf8' === $this->charset && $this->has_cap( 'utf8mb4' ) ) {
$this->charset = 'utf8mb4';
if ( 'utf8' === $charset && $this->has_cap( 'utf8mb4' ) ) {
$charset = 'utf8mb4';
}
if ( 'utf8mb4' === $this->charset ) {
if ( 'utf8mb4' === $charset ) {
// _general_ is outdated, so we can upgrade it to _unicode_, instead.
if ( ! $this->collate || 'utf8_general_ci' === $this->collate ) {
$this->collate = 'utf8mb4_unicode_ci';
if ( ! $collate || 'utf8_general_ci' === $collate ) {
$collate = 'utf8mb4_unicode_ci';
} else {
$this->collate = str_replace( 'utf8_', 'utf8mb4_', $this->collate );
$collate = str_replace( 'utf8_', 'utf8mb4_', $collate );
}
}
// _unicode_520_ is a better collation, we should use that when it's available.
if ( $this->has_cap( 'utf8mb4_520' ) && 'utf8mb4_unicode_ci' === $this->collate ) {
$this->collate = 'utf8mb4_unicode_520_ci';
if ( $this->has_cap( 'utf8mb4_520' ) && 'utf8mb4_unicode_ci' === $collate ) {
$collate = 'utf8mb4_unicode_520_ci';
}
return compact( 'charset', 'collate' );
}
/**