When sanitizing a URL to redirect to, UTF-8 characters can be URL encoded, instead of being removed.

While RFC 3986 does not specify which character sets are allowed in URIs, Section 2.5 states that octects matching UTF-8 character encoding should be percent-encoded, then unreserved octets outside of the UTF-8 range should be percent-encoded. As browsers tend to only implement support for UTF-8 in URLs, this change only implements the UTF-8 encoding part. We may revisit the second part if it becomes an issue.

Fixes #31486


Built from https://develop.svn.wordpress.org/trunk@31587


git-svn-id: http://core.svn.wordpress.org/trunk@31568 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
Gary Pendergast 2015-02-28 02:21:26 +00:00
parent 5aae51144d
commit 18bb886b22
2 changed files with 27 additions and 1 deletions

View File

@ -1201,6 +1201,19 @@ if ( !function_exists('wp_sanitize_redirect') ) :
* @return string redirect-sanitized URL * @return string redirect-sanitized URL
**/ **/
function wp_sanitize_redirect($location) { function wp_sanitize_redirect($location) {
$regex = '/
(
(?: [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx
| \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2
| [\xE1-\xEC][\x80-\xBF]{2}
| \xED[\x80-\x9F][\x80-\xBF]
| [\xEE-\xEF][\x80-\xBF]{2}
| \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3
| [\xF1-\xF3][\x80-\xBF]{3}
| \xF4[\x80-\x8F][\x80-\xBF]{2}
){1,50} # ...one or more times
)/x';
$location = preg_replace_callback( $regex, '_wp_sanitize_utf8_in_redirect', $location );
$location = preg_replace('|[^a-z0-9-~+_.?#=&;,/:%!*\[\]()]|i', '', $location); $location = preg_replace('|[^a-z0-9-~+_.?#=&;,/:%!*\[\]()]|i', '', $location);
$location = wp_kses_no_null($location); $location = wp_kses_no_null($location);
@ -1209,6 +1222,19 @@ function wp_sanitize_redirect($location) {
$location = _deep_replace($strip, $location); $location = _deep_replace($strip, $location);
return $location; return $location;
} }
/**
* URL encode UTF-8 characters in a URL.
*
* @ignore
* @since 4.2.0
* @access private
*
* @see wp_sanitize_redirect()
*/
function _wp_sanitize_utf8_in_redirect( $matches ) {
return urlencode( $matches[0] );
}
endif; endif;
if ( !function_exists('wp_safe_redirect') ) : if ( !function_exists('wp_safe_redirect') ) :

View File

@ -4,7 +4,7 @@
* *
* @global string $wp_version * @global string $wp_version
*/ */
$wp_version = '4.2-alpha-31586'; $wp_version = '4.2-alpha-31587';
/** /**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema. * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.