WPDB: When checking that a string can be sent to MySQL, we shouldn't use `mb_convert_encoding()`, as it behaves differently to MySQL's character encoding conversion.
Props mdawaffe, pento, nbachiyski, jorbin, johnjamesjacoby, jeremyfelt. See #32165. Built from https://develop.svn.wordpress.org/trunk@32364 git-svn-id: http://core.svn.wordpress.org/trunk@32335 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
parent
29216371bd
commit
364886a5be
|
@ -527,7 +527,7 @@ function upgrade_all() {
|
||||||
if ( $wp_current_db_version < 31351 )
|
if ( $wp_current_db_version < 31351 )
|
||||||
upgrade_420();
|
upgrade_420();
|
||||||
|
|
||||||
if ( $wp_current_db_version < 32308 )
|
if ( $wp_current_db_version < 32364 )
|
||||||
upgrade_430();
|
upgrade_430();
|
||||||
|
|
||||||
maybe_disable_link_manager();
|
maybe_disable_link_manager();
|
||||||
|
@ -1446,17 +1446,33 @@ function upgrade_420() {
|
||||||
function upgrade_430() {
|
function upgrade_430() {
|
||||||
global $wp_current_db_version, $wpdb;
|
global $wp_current_db_version, $wpdb;
|
||||||
|
|
||||||
if ( $wp_current_db_version < 32308 ) {
|
if ( $wp_current_db_version < 32364 ) {
|
||||||
$content_length = $wpdb->get_col_length( $wpdb->comments, 'comment_content' );
|
$content_length = $wpdb->get_col_length( $wpdb->comments, 'comment_content' );
|
||||||
if ( ! $content_length ) {
|
if ( false === $content_length ) {
|
||||||
$content_length = 65535;
|
$content_length = array(
|
||||||
|
'type' => 'byte',
|
||||||
|
'length' => 65535,
|
||||||
|
);
|
||||||
|
} elseif ( ! is_array( $content_length ) ) {
|
||||||
|
$length = (int) $content_length > 0 ? (int) $content_length : 65535;
|
||||||
|
$content_length = array(
|
||||||
|
'type' => 'byte',
|
||||||
|
'length' => $length
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( 'byte' !== $content_length['type'] ) {
|
||||||
|
// Sites with malformed DB schemas are on their own.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$allowed_length = intval( $content_length['length'] ) - 10;
|
||||||
|
|
||||||
$comments = $wpdb->get_results(
|
$comments = $wpdb->get_results(
|
||||||
"SELECT comment_ID FROM $wpdb->comments
|
"SELECT `comment_ID` FROM `{$wpdb->comments}`
|
||||||
WHERE comment_date_gmt > '2015-04-26'
|
WHERE `comment_date_gmt` > '2015-04-26'
|
||||||
AND CHAR_LENGTH( comment_content ) >= $content_length
|
AND LENGTH( `comment_content` ) >= {$allowed_length}
|
||||||
AND ( comment_content LIKE '%<%' OR comment_content LIKE '%>%' )"
|
AND ( `comment_content` LIKE '%<%' OR `comment_content` LIKE '%>%' )"
|
||||||
);
|
);
|
||||||
|
|
||||||
foreach ( $comments as $comment ) {
|
foreach ( $comments as $comment ) {
|
||||||
|
|
|
@ -2118,17 +2118,7 @@ function wp_insert_comment( $commentdata ) {
|
||||||
|
|
||||||
$compacted = compact( 'comment_post_ID', 'comment_author', 'comment_author_email', 'comment_author_url', 'comment_author_IP', 'comment_date', 'comment_date_gmt', 'comment_content', 'comment_karma', 'comment_approved', 'comment_agent', 'comment_type', 'comment_parent', 'user_id' );
|
$compacted = compact( 'comment_post_ID', 'comment_author', 'comment_author_email', 'comment_author_url', 'comment_author_IP', 'comment_date', 'comment_date_gmt', 'comment_content', 'comment_karma', 'comment_approved', 'comment_agent', 'comment_type', 'comment_parent', 'user_id' );
|
||||||
if ( ! $wpdb->insert( $wpdb->comments, $compacted ) ) {
|
if ( ! $wpdb->insert( $wpdb->comments, $compacted ) ) {
|
||||||
$fields = array( 'comment_author', 'comment_author_email', 'comment_author_url', 'comment_content' );
|
return false;
|
||||||
|
|
||||||
foreach( $fields as $field ) {
|
|
||||||
if ( isset( $compacted[ $field ] ) ) {
|
|
||||||
$compacted[ $field ] = $wpdb->strip_invalid_text_for_column( $wpdb->comments, $field, $compacted[ $field ] );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( ! $wpdb->insert( $wpdb->comments, $compacted ) ) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$id = (int) $wpdb->insert_id;
|
$id = (int) $wpdb->insert_id;
|
||||||
|
@ -2252,6 +2242,8 @@ function wp_throttle_comment_flood($block, $time_lastcomment, $time_newcomment)
|
||||||
* @return int|bool The ID of the comment on success, false on failure.
|
* @return int|bool The ID of the comment on success, false on failure.
|
||||||
*/
|
*/
|
||||||
function wp_new_comment( $commentdata ) {
|
function wp_new_comment( $commentdata ) {
|
||||||
|
global $wpdb;
|
||||||
|
|
||||||
if ( isset( $commentdata['user_ID'] ) ) {
|
if ( isset( $commentdata['user_ID'] ) ) {
|
||||||
$commentdata['user_id'] = $commentdata['user_ID'] = (int) $commentdata['user_ID'];
|
$commentdata['user_id'] = $commentdata['user_ID'] = (int) $commentdata['user_ID'];
|
||||||
}
|
}
|
||||||
|
@ -2295,7 +2287,22 @@ function wp_new_comment( $commentdata ) {
|
||||||
|
|
||||||
$comment_ID = wp_insert_comment($commentdata);
|
$comment_ID = wp_insert_comment($commentdata);
|
||||||
if ( ! $comment_ID ) {
|
if ( ! $comment_ID ) {
|
||||||
return false;
|
$fields = array( 'comment_author', 'comment_author_email', 'comment_author_url', 'comment_content' );
|
||||||
|
|
||||||
|
foreach( $fields as $field ) {
|
||||||
|
if ( isset( $commentdata[ $field ] ) ) {
|
||||||
|
$commentdata[ $field ] = $wpdb->strip_invalid_text_for_column( $wpdb->comments, $field, $commentdata[ $field ] );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$commentdata = wp_filter_comment( $commentdata );
|
||||||
|
|
||||||
|
$commentdata['comment_approved'] = wp_allow_comment( $commentdata );
|
||||||
|
|
||||||
|
$comment_ID = wp_insert_comment( $commentdata );
|
||||||
|
if ( ! $comment_ID ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -13,23 +13,85 @@ if ( !function_exists('_') ) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether PCRE/u (PCRE_UTF8 modifier) is available for use.
|
||||||
|
*
|
||||||
|
* @ignore
|
||||||
|
* @since 4.2.2
|
||||||
|
* @access private
|
||||||
|
*
|
||||||
|
* @param bool $set - Used for testing only
|
||||||
|
* null : default - get PCRE/u capability
|
||||||
|
* false : Used for testing - return false for future calls to this function
|
||||||
|
* 'reset': Used for testing - restore default behavior of this function
|
||||||
|
*/
|
||||||
|
function _wp_can_use_pcre_u( $set = null ) {
|
||||||
|
static $utf8_pcre = 'reset';
|
||||||
|
|
||||||
|
if ( null !== $set ) {
|
||||||
|
$utf8_pcre = $set;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( 'reset' === $utf8_pcre ) {
|
||||||
|
$utf8_pcre = @preg_match( '/^./u', 'a' );
|
||||||
|
}
|
||||||
|
|
||||||
|
return $utf8_pcre;
|
||||||
|
}
|
||||||
|
|
||||||
if ( ! function_exists( 'mb_substr' ) ) :
|
if ( ! function_exists( 'mb_substr' ) ) :
|
||||||
function mb_substr( $str, $start, $length = null, $encoding = null ) {
|
function mb_substr( $str, $start, $length = null, $encoding = null ) {
|
||||||
return _mb_substr( $str, $start, $length, $encoding );
|
return _mb_substr( $str, $start, $length, $encoding );
|
||||||
}
|
}
|
||||||
endif;
|
endif;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Only understands UTF-8 and 8bit. All other character sets will be treated as 8bit.
|
||||||
|
* For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence.
|
||||||
|
* The behavior of this function for invalid inputs is undefined.
|
||||||
|
*/
|
||||||
function _mb_substr( $str, $start, $length = null, $encoding = null ) {
|
function _mb_substr( $str, $start, $length = null, $encoding = null ) {
|
||||||
|
if ( null === $encoding ) {
|
||||||
|
$encoding = get_option( 'blog_charset' );
|
||||||
|
}
|
||||||
|
|
||||||
// The solution below works only for UTF-8,
|
// The solution below works only for UTF-8,
|
||||||
// so in case of a different charset just use built-in substr()
|
// so in case of a different charset just use built-in substr()
|
||||||
$charset = get_option( 'blog_charset' );
|
if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) {
|
||||||
if ( ! in_array( $charset, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) {
|
|
||||||
return is_null( $length ) ? substr( $str, $start ) : substr( $str, $start, $length );
|
return is_null( $length ) ? substr( $str, $start ) : substr( $str, $start, $length );
|
||||||
}
|
}
|
||||||
// Use the regex unicode support to separate the UTF-8 characters into an array
|
|
||||||
preg_match_all( '/./us', $str, $match );
|
if ( _wp_can_use_pcre_u() ) {
|
||||||
$chars = is_null( $length ) ? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length );
|
// Use the regex unicode support to separate the UTF-8 characters into an array
|
||||||
return implode( '', $chars );
|
preg_match_all( '/./us', $str, $match );
|
||||||
|
$chars = is_null( $length ) ? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length );
|
||||||
|
return implode( '', $chars );
|
||||||
|
}
|
||||||
|
|
||||||
|
$regex = '/(
|
||||||
|
[\x00-\x7F] # single-byte sequences 0xxxxxxx
|
||||||
|
| [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx
|
||||||
|
| \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2
|
||||||
|
| [\xE1-\xEC][\x80-\xBF]{2}
|
||||||
|
| \xED[\x80-\x9F][\x80-\xBF]
|
||||||
|
| [\xEE-\xEF][\x80-\xBF]{2}
|
||||||
|
| \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3
|
||||||
|
| [\xF1-\xF3][\x80-\xBF]{3}
|
||||||
|
| \xF4[\x80-\x8F][\x80-\xBF]{2}
|
||||||
|
)/x';
|
||||||
|
|
||||||
|
$chars = array( '' ); // Start with 1 element instead of 0 since the first thing we do is pop
|
||||||
|
do {
|
||||||
|
// We had some string left over from the last round, but we counted it in that last round.
|
||||||
|
array_pop( $chars );
|
||||||
|
|
||||||
|
// Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string)
|
||||||
|
$pieces = preg_split( $regex, $str, 1000, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
|
||||||
|
|
||||||
|
$chars = array_merge( $chars, $pieces );
|
||||||
|
} while ( count( $pieces ) > 1 && $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop.
|
||||||
|
|
||||||
|
return join( '', array_slice( $chars, $start, $length ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( ! function_exists( 'mb_strlen' ) ) :
|
if ( ! function_exists( 'mb_strlen' ) ) :
|
||||||
|
@ -38,16 +100,54 @@ if ( ! function_exists( 'mb_strlen' ) ) :
|
||||||
}
|
}
|
||||||
endif;
|
endif;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Only understands UTF-8 and 8bit. All other character sets will be treated as 8bit.
|
||||||
|
* For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence.
|
||||||
|
* The behavior of this function for invalid inputs is undefined.
|
||||||
|
*/
|
||||||
function _mb_strlen( $str, $encoding = null ) {
|
function _mb_strlen( $str, $encoding = null ) {
|
||||||
|
if ( null === $encoding ) {
|
||||||
|
$encoding = get_option( 'blog_charset' );
|
||||||
|
}
|
||||||
|
|
||||||
// The solution below works only for UTF-8,
|
// The solution below works only for UTF-8,
|
||||||
// so in case of a different charset just use built-in strlen()
|
// so in case of a different charset just use built-in strlen()
|
||||||
$charset = get_option( 'blog_charset' );
|
if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) {
|
||||||
if ( ! in_array( $charset, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) {
|
|
||||||
return strlen( $str );
|
return strlen( $str );
|
||||||
}
|
}
|
||||||
// Use the regex unicode support to separate the UTF-8 characters into an array
|
|
||||||
preg_match_all( '/./us', $str, $match );
|
if ( _wp_can_use_pcre_u() ) {
|
||||||
return count( $match[0] );
|
// Use the regex unicode support to separate the UTF-8 characters into an array
|
||||||
|
preg_match_all( '/./us', $str, $match );
|
||||||
|
return count( $match[0] );
|
||||||
|
}
|
||||||
|
|
||||||
|
$regex = '/(?:
|
||||||
|
[\x00-\x7F] # single-byte sequences 0xxxxxxx
|
||||||
|
| [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx
|
||||||
|
| \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2
|
||||||
|
| [\xE1-\xEC][\x80-\xBF]{2}
|
||||||
|
| \xED[\x80-\x9F][\x80-\xBF]
|
||||||
|
| [\xEE-\xEF][\x80-\xBF]{2}
|
||||||
|
| \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3
|
||||||
|
| [\xF1-\xF3][\x80-\xBF]{3}
|
||||||
|
| \xF4[\x80-\x8F][\x80-\xBF]{2}
|
||||||
|
)/x';
|
||||||
|
|
||||||
|
$count = 1; // Start at 1 instead of 0 since the first thing we do is decrement
|
||||||
|
do {
|
||||||
|
// We had some string left over from the last round, but we counted it in that last round.
|
||||||
|
$count--;
|
||||||
|
|
||||||
|
// Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string)
|
||||||
|
$pieces = preg_split( $regex, $str, 1000 );
|
||||||
|
|
||||||
|
// Increment
|
||||||
|
$count += count( $pieces );
|
||||||
|
} while ( $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop.
|
||||||
|
|
||||||
|
// Fencepost: preg_split() always returns one extra item in the array
|
||||||
|
return --$count;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( !function_exists('hash_hmac') ):
|
if ( !function_exists('hash_hmac') ):
|
||||||
|
|
|
@ -4,14 +4,14 @@
|
||||||
*
|
*
|
||||||
* @global string $wp_version
|
* @global string $wp_version
|
||||||
*/
|
*/
|
||||||
$wp_version = '4.3-alpha-32359';
|
$wp_version = '4.3-alpha-32364';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.
|
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.
|
||||||
*
|
*
|
||||||
* @global int $wp_db_version
|
* @global int $wp_db_version
|
||||||
*/
|
*/
|
||||||
$wp_db_version = 32308;
|
$wp_db_version = 32364;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds the TinyMCE version
|
* Holds the TinyMCE version
|
||||||
|
|
|
@ -1809,6 +1809,8 @@ class wpdb {
|
||||||
* @return int|false The number of rows affected, or false on error.
|
* @return int|false The number of rows affected, or false on error.
|
||||||
*/
|
*/
|
||||||
function _insert_replace_helper( $table, $data, $format = null, $type = 'INSERT' ) {
|
function _insert_replace_helper( $table, $data, $format = null, $type = 'INSERT' ) {
|
||||||
|
$this->insert_id = 0;
|
||||||
|
|
||||||
if ( ! in_array( strtoupper( $type ), array( 'REPLACE', 'INSERT' ) ) ) {
|
if ( ! in_array( strtoupper( $type ), array( 'REPLACE', 'INSERT' ) ) ) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -1829,7 +1831,6 @@ class wpdb {
|
||||||
|
|
||||||
$sql = "$type INTO `$table` ($fields) VALUES ($formats)";
|
$sql = "$type INTO `$table` ($fields) VALUES ($formats)";
|
||||||
|
|
||||||
$this->insert_id = 0;
|
|
||||||
$this->check_current_query = false;
|
$this->check_current_query = false;
|
||||||
return $this->query( $this->prepare( $sql, $values ) );
|
return $this->query( $this->prepare( $sql, $values ) );
|
||||||
}
|
}
|
||||||
|
@ -2021,17 +2022,11 @@ class wpdb {
|
||||||
// We can skip this field if we know it isn't a string.
|
// We can skip this field if we know it isn't a string.
|
||||||
// This checks %d/%f versus ! %s because it's sprintf() could take more.
|
// This checks %d/%f versus ! %s because it's sprintf() could take more.
|
||||||
$value['charset'] = false;
|
$value['charset'] = false;
|
||||||
} elseif ( $this->check_ascii( $value['value'] ) ) {
|
|
||||||
// If it's ASCII, then we don't need the charset. We can skip this field.
|
|
||||||
$value['charset'] = false;
|
|
||||||
} else {
|
} else {
|
||||||
$value['charset'] = $this->get_col_charset( $table, $field );
|
$value['charset'] = $this->get_col_charset( $table, $field );
|
||||||
if ( is_wp_error( $value['charset'] ) ) {
|
if ( is_wp_error( $value['charset'] ) ) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This isn't ASCII. Don't have strip_invalid_text() re-check.
|
|
||||||
$value['ascii'] = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$data[ $field ] = $value;
|
$data[ $field ] = $value;
|
||||||
|
@ -2064,10 +2059,6 @@ class wpdb {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( false !== $value['length'] && mb_strlen( $value['value'] ) > $value['length'] ) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
$data[ $field ] = $value;
|
$data[ $field ] = $value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2406,14 +2397,16 @@ class wpdb {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieve the maximum string length allowed in a given column.
|
* Retrieve the maximum string length allowed in a given column.
|
||||||
|
* The length may either be specified as a byte length or a character length.
|
||||||
*
|
*
|
||||||
* @since 4.2.1
|
* @since 4.2.1
|
||||||
* @access public
|
* @access public
|
||||||
*
|
*
|
||||||
* @param string $table Table name.
|
* @param string $table Table name.
|
||||||
* @param string $column Column name.
|
* @param string $column Column name.
|
||||||
* @return mixed Max column length as an int. False if the column has no
|
* @return mixed array( 'length' => (int), 'type' => 'byte' | 'char' )
|
||||||
* length. WP_Error object if there was an error.
|
* false if the column has no length (for example, numeric column)
|
||||||
|
* WP_Error object if there was an error.
|
||||||
*/
|
*/
|
||||||
public function get_col_length( $table, $column ) {
|
public function get_col_length( $table, $column ) {
|
||||||
$tablekey = strtolower( $table );
|
$tablekey = strtolower( $table );
|
||||||
|
@ -2446,27 +2439,47 @@ class wpdb {
|
||||||
}
|
}
|
||||||
|
|
||||||
switch( $type ) {
|
switch( $type ) {
|
||||||
case 'binary':
|
|
||||||
case 'char':
|
case 'char':
|
||||||
case 'varbinary':
|
|
||||||
case 'varchar':
|
case 'varchar':
|
||||||
return $length;
|
return array(
|
||||||
|
'type' => 'char',
|
||||||
|
'length' => (int) $length,
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
case 'binary':
|
||||||
|
case 'varbinary':
|
||||||
|
return array(
|
||||||
|
'type' => 'byte',
|
||||||
|
'length' => (int) $length,
|
||||||
|
);
|
||||||
break;
|
break;
|
||||||
case 'tinyblob':
|
case 'tinyblob':
|
||||||
case 'tinytext':
|
case 'tinytext':
|
||||||
return 255; // 2^8 - 1
|
return array(
|
||||||
|
'type' => 'byte',
|
||||||
|
'length' => 255, // 2^8 - 1
|
||||||
|
);
|
||||||
break;
|
break;
|
||||||
case 'blob':
|
case 'blob':
|
||||||
case 'text':
|
case 'text':
|
||||||
return 65535; // 2^16 - 1
|
return array(
|
||||||
|
'type' => 'byte',
|
||||||
|
'length' => 65535, // 2^16 - 1
|
||||||
|
);
|
||||||
break;
|
break;
|
||||||
case 'mediumblob':
|
case 'mediumblob':
|
||||||
case 'mediumtext':
|
case 'mediumtext':
|
||||||
return 16777215; // 2^24 - 1
|
return array(
|
||||||
|
'type' => 'byte',
|
||||||
|
'length' => 16777215, // 2^24 - 1
|
||||||
|
);
|
||||||
break;
|
break;
|
||||||
case 'longblob':
|
case 'longblob':
|
||||||
case 'longtext':
|
case 'longtext':
|
||||||
return 4294967295; // 2^32 - 1
|
return array(
|
||||||
|
'type' => 'byte',
|
||||||
|
'length' => 4294967295, // 2^32 - 1
|
||||||
|
);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
|
@ -2572,50 +2585,55 @@ class wpdb {
|
||||||
* remove invalid characters, a WP_Error object is returned.
|
* remove invalid characters, a WP_Error object is returned.
|
||||||
*/
|
*/
|
||||||
protected function strip_invalid_text( $data ) {
|
protected function strip_invalid_text( $data ) {
|
||||||
// Some multibyte character sets that we can check in PHP.
|
|
||||||
$mb_charsets = array(
|
|
||||||
'ascii' => 'ASCII',
|
|
||||||
'big5' => 'BIG-5',
|
|
||||||
'eucjpms' => 'eucJP-win',
|
|
||||||
'gb2312' => 'EUC-CN',
|
|
||||||
'ujis' => 'EUC-JP',
|
|
||||||
'utf32' => 'UTF-32',
|
|
||||||
);
|
|
||||||
|
|
||||||
$supported_charsets = array();
|
|
||||||
if ( function_exists( 'mb_list_encodings' ) ) {
|
|
||||||
$supported_charsets = mb_list_encodings();
|
|
||||||
}
|
|
||||||
|
|
||||||
$db_check_string = false;
|
$db_check_string = false;
|
||||||
|
|
||||||
foreach ( $data as &$value ) {
|
foreach ( $data as &$value ) {
|
||||||
$charset = $value['charset'];
|
$charset = $value['charset'];
|
||||||
|
|
||||||
// Column isn't a string, or is latin1, which will will happily store anything.
|
if ( is_array( $value['length'] ) ) {
|
||||||
if ( false === $charset || 'latin1' === $charset ) {
|
$length = $value['length']['length'];
|
||||||
|
} else {
|
||||||
|
$length = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// There's no charset to work with.
|
||||||
|
if ( false === $charset ) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Column isn't a string.
|
||||||
if ( ! is_string( $value['value'] ) ) {
|
if ( ! is_string( $value['value'] ) ) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ASCII is always OK.
|
$truncate_by_byte_length = 'byte' === $value['length']['type'];
|
||||||
if ( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) ) {
|
|
||||||
continue;
|
$needs_validation = true;
|
||||||
|
if (
|
||||||
|
// latin1 can store any byte sequence
|
||||||
|
'latin1' === $charset
|
||||||
|
||
|
||||||
|
// ASCII is always OK.
|
||||||
|
( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) )
|
||||||
|
) {
|
||||||
|
$truncate_by_byte_length = true;
|
||||||
|
$needs_validation = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert the text locally.
|
if ( $truncate_by_byte_length ) {
|
||||||
if ( $supported_charsets ) {
|
mbstring_binary_safe_encoding();
|
||||||
if ( isset( $mb_charsets[ $charset ] ) && in_array( $mb_charsets[ $charset ], $supported_charsets ) ) {
|
if ( false !== $length && strlen( $value['value'] ) > $length ) {
|
||||||
$value['value'] = mb_convert_encoding( $value['value'], $mb_charsets[ $charset ], $mb_charsets[ $charset ] );
|
$value['value'] = substr( $value['value'], 0, $length );
|
||||||
|
}
|
||||||
|
reset_mbstring_encoding();
|
||||||
|
|
||||||
|
if ( ! $needs_validation ) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// utf8 can be handled by regex, which is a bunch faster than a DB lookup.
|
// utf8 can be handled by regex, which is a bunch faster than a DB lookup.
|
||||||
if ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) {
|
if ( ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) && function_exists( 'mb_strlen' ) ) {
|
||||||
$regex = '/
|
$regex = '/
|
||||||
(
|
(
|
||||||
(?: [\x00-\x7F] # single-byte sequences 0xxxxxxx
|
(?: [\x00-\x7F] # single-byte sequences 0xxxxxxx
|
||||||
|
@ -2625,7 +2643,7 @@ class wpdb {
|
||||||
| \xED[\x80-\x9F][\x80-\xBF]
|
| \xED[\x80-\x9F][\x80-\xBF]
|
||||||
| [\xEE-\xEF][\x80-\xBF]{2}';
|
| [\xEE-\xEF][\x80-\xBF]{2}';
|
||||||
|
|
||||||
if ( 'utf8mb4' === $charset) {
|
if ( 'utf8mb4' === $charset ) {
|
||||||
$regex .= '
|
$regex .= '
|
||||||
| \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3
|
| \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3
|
||||||
| [\xF1-\xF3][\x80-\xBF]{3}
|
| [\xF1-\xF3][\x80-\xBF]{3}
|
||||||
|
@ -2638,6 +2656,11 @@ class wpdb {
|
||||||
| . # anything else
|
| . # anything else
|
||||||
/x';
|
/x';
|
||||||
$value['value'] = preg_replace( $regex, '$1', $value['value'] );
|
$value['value'] = preg_replace( $regex, '$1', $value['value'] );
|
||||||
|
|
||||||
|
|
||||||
|
if ( false !== $length && mb_strlen( $value['value'], 'UTF-8' ) > $length ) {
|
||||||
|
$value['value'] = mb_substr( $value['value'], 0, $length, 'UTF-8' );
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2654,8 +2677,14 @@ class wpdb {
|
||||||
$queries[ $value['charset'] ] = array();
|
$queries[ $value['charset'] ] = array();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Split the CONVERT() calls by charset, so we can make sure the connection is right
|
// We're going to need to truncate by characters or bytes, depending on the length value we have.
|
||||||
$queries[ $value['charset'] ][ $col ] = $this->prepare( "CONVERT( %s USING {$value['charset']} )", $value['value'] );
|
if ( 'byte' === $value['length']['type'] ) {
|
||||||
|
// Split the CONVERT() calls by charset, so we can make sure the connection is right
|
||||||
|
$queries[ $value['charset'] ][ $col ] = $this->prepare( "CONVERT( LEFT( CONVERT( %s USING binary ), %d ) USING {$value['charset']} )", $value['value'], $value['length']['length'] );
|
||||||
|
} else {
|
||||||
|
$queries[ $value['charset'] ][ $col ] = $this->prepare( "LEFT( CONVERT( %s USING {$value['charset']} ), %d )", $value['value'], $value['length']['length'] );
|
||||||
|
}
|
||||||
|
|
||||||
unset( $data[ $col ]['db'] );
|
unset( $data[ $col ]['db'] );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2674,16 +2703,19 @@ class wpdb {
|
||||||
|
|
||||||
$this->check_current_query = false;
|
$this->check_current_query = false;
|
||||||
|
|
||||||
$row = $this->get_row( "SELECT " . implode( ', ', $query ), ARRAY_N );
|
$sql = array();
|
||||||
|
foreach ( $query as $column => $column_query ) {
|
||||||
|
$sql[] = $column_query . " AS x_$column";
|
||||||
|
}
|
||||||
|
|
||||||
|
$row = $this->get_row( "SELECT " . implode( ', ', $sql ), ARRAY_A );
|
||||||
if ( ! $row ) {
|
if ( ! $row ) {
|
||||||
$this->set_charset( $this->dbh, $connection_charset );
|
$this->set_charset( $this->dbh, $connection_charset );
|
||||||
return new WP_Error( 'wpdb_strip_invalid_text_failure' );
|
return new WP_Error( 'wpdb_strip_invalid_text_failure' );
|
||||||
}
|
}
|
||||||
|
|
||||||
$cols = array_keys( $query );
|
foreach ( array_keys( $query ) as $column ) {
|
||||||
$col_count = count( $cols );
|
$data[ $column ]['value'] = $row["x_$column"];
|
||||||
for ( $ii = 0; $ii < $col_count; $ii++ ) {
|
|
||||||
$data[ $cols[ $ii ] ]['value'] = $row[ $ii ];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2725,6 +2757,7 @@ class wpdb {
|
||||||
'value' => $query,
|
'value' => $query,
|
||||||
'charset' => $charset,
|
'charset' => $charset,
|
||||||
'ascii' => false,
|
'ascii' => false,
|
||||||
|
'length' => false,
|
||||||
);
|
);
|
||||||
|
|
||||||
$data = $this->strip_invalid_text( array( $data ) );
|
$data = $this->strip_invalid_text( array( $data ) );
|
||||||
|
@ -2747,7 +2780,7 @@ class wpdb {
|
||||||
* @return string|WP_Error The converted string, or a WP_Error object if the conversion fails.
|
* @return string|WP_Error The converted string, or a WP_Error object if the conversion fails.
|
||||||
*/
|
*/
|
||||||
public function strip_invalid_text_for_column( $table, $column, $value ) {
|
public function strip_invalid_text_for_column( $table, $column, $value ) {
|
||||||
if ( ! is_string( $value ) || $this->check_ascii( $value ) ) {
|
if ( ! is_string( $value ) ) {
|
||||||
return $value;
|
return $value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2764,7 +2797,7 @@ class wpdb {
|
||||||
$column => array(
|
$column => array(
|
||||||
'value' => $value,
|
'value' => $value,
|
||||||
'charset' => $charset,
|
'charset' => $charset,
|
||||||
'ascii' => false,
|
'length' => $this->get_col_length( $table, $column ),
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue