From 18bb886b225f82bfe94925290fb93b4dc5676774 Mon Sep 17 00:00:00 2001 From: Gary Pendergast Date: Sat, 28 Feb 2015 02:21:26 +0000 Subject: [PATCH] When sanitizing a URL to redirect to, UTF-8 characters can be URL encoded, instead of being removed. While RFC 3986 does not specify which character sets are allowed in URIs, Section 2.5 states that octects matching UTF-8 character encoding should be percent-encoded, then unreserved octets outside of the UTF-8 range should be percent-encoded. As browsers tend to only implement support for UTF-8 in URLs, this change only implements the UTF-8 encoding part. We may revisit the second part if it becomes an issue. Fixes #31486 Built from https://develop.svn.wordpress.org/trunk@31587 git-svn-id: http://core.svn.wordpress.org/trunk@31568 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- wp-includes/pluggable.php | 26 ++++++++++++++++++++++++++ wp-includes/version.php | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/wp-includes/pluggable.php b/wp-includes/pluggable.php index 4eae503d7e..71c0d3704b 100644 --- a/wp-includes/pluggable.php +++ b/wp-includes/pluggable.php @@ -1201,6 +1201,19 @@ if ( !function_exists('wp_sanitize_redirect') ) : * @return string redirect-sanitized URL **/ function wp_sanitize_redirect($location) { + $regex = '/ + ( + (?: [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx + | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 + | [\xE1-\xEC][\x80-\xBF]{2} + | \xED[\x80-\x9F][\x80-\xBF] + | [\xEE-\xEF][\x80-\xBF]{2} + | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 + | [\xF1-\xF3][\x80-\xBF]{3} + | \xF4[\x80-\x8F][\x80-\xBF]{2} + ){1,50} # ...one or more times + )/x'; + $location = preg_replace_callback( $regex, '_wp_sanitize_utf8_in_redirect', $location ); $location = preg_replace('|[^a-z0-9-~+_.?#=&;,/:%!*\[\]()]|i', '', $location); $location = wp_kses_no_null($location); @@ -1209,6 +1222,19 @@ function wp_sanitize_redirect($location) { $location = _deep_replace($strip, $location); return $location; } + +/** + * URL encode UTF-8 characters in a URL. + * + * @ignore + * @since 4.2.0 + * @access private + * + * @see wp_sanitize_redirect() + */ +function _wp_sanitize_utf8_in_redirect( $matches ) { + return urlencode( $matches[0] ); +} endif; if ( !function_exists('wp_safe_redirect') ) : diff --git a/wp-includes/version.php b/wp-includes/version.php index 1b3ccae8b7..dc9bfddda2 100644 --- a/wp-includes/version.php +++ b/wp-includes/version.php @@ -4,7 +4,7 @@ * * @global string $wp_version */ -$wp_version = '4.2-alpha-31586'; +$wp_version = '4.2-alpha-31587'; /** * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.