From 7a52a3aac798efe7f9327311225682717f2d8f51 Mon Sep 17 00:00:00 2001 From: Peter Wilson Date: Tue, 4 Oct 2016 20:33:29 +0000 Subject: [PATCH] HTTP API: Simplify `wp_parse_url()` to ensure consistent results. [38694] revealed some URL formats were been parsed incorrectly, including those used by Google Fonts. This change simplifies the function to use placeholder values which cause PHP's parsing to behave consistently. Props jrf, peterwilsoncc. Fixes #36356. Built from https://develop.svn.wordpress.org/trunk@38726 git-svn-id: http://core.svn.wordpress.org/trunk@38669 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- wp-includes/http.php | 146 +++++++++++++++++++++++++--------------- wp-includes/version.php | 2 +- 2 files changed, 93 insertions(+), 55 deletions(-) diff --git a/wp-includes/http.php b/wp-includes/http.php index 3738b791cf..62638d5474 100644 --- a/wp-includes/http.php +++ b/wp-includes/http.php @@ -623,11 +623,16 @@ function ms_allowed_http_request_hosts( $is_external, $host ) { } /** - * A wrapper for PHP's parse_url() function that handles edgecases in < PHP 5.4.7 + * A wrapper for PHP's parse_url() function that handles consistency in the return + * values across PHP versions. * * PHP 5.4.7 expanded parse_url()'s ability to handle non-absolute url's, including - * schemeless and relative url's with :// in the path, this works around those - * limitations providing a standard output on PHP 5.2~5.4+. + * schemeless and relative url's with :// in the path. This function works around + * those limitations providing a standard output on PHP 5.2~5.4+. + * + * Secondly, across various PHP versions, schemeless URLs starting containing a ":" + * in the query are being handled inconsistently. This function works around those + * differences as well. * * Error suppression is used as prior to PHP 5.3.3, an E_WARNING would be generated * when URL parsing failed. @@ -640,63 +645,96 @@ function ms_allowed_http_request_hosts( $is_external, $host ) { * predefined constants to specify which one. * Defaults to -1 (= return all parts as an array). * @see http://php.net/manual/en/function.parse-url.php - * @return mixed False on failure; Array of URL components on success; - * When a specific component has been requested: null if the component doesn't - * exist in the given URL; a sting or - in the case of PHP_URL_PORT - integer - * when it does; See parse_url()'s return values. + * @return mixed False on parse failure; Array of URL components on success; + * When a specific component has been requested: null if the component + * doesn't exist in the given URL; a sting or - in the case of + * PHP_URL_PORT - integer when it does. See parse_url()'s return values. */ function wp_parse_url( $url, $component = -1 ) { - $parts = @parse_url( $url, $component ); + $to_unset = array(); + $url = strval( $url ); - if ( version_compare( PHP_VERSION, '5.4.7', '>=' ) ) { + if ( '//' === substr( $url, 0, 2 ) ) { + $to_unset[] = 'scheme'; + $url = 'placeholder:' . $url; + } elseif ( '/' === substr( $url, 0, 1 ) ) { + $to_unset[] = 'scheme'; + $to_unset[] = 'host'; + $url = 'placeholder://placeholder' . $url; + } + + $parts = @parse_url( $url ); + + if ( false === $parts ) { + // Parsing failure. return $parts; } - if ( false === $parts ) { - // < PHP 5.4.7 compat, trouble with relative paths including a scheme break in the path. - if ( '/' == $url[0] && false !== strpos( $url, '://' ) ) { - if ( in_array( $component, array( PHP_URL_SCHEME, PHP_URL_HOST ), true ) ) { - return null; - } - // Since we know it's a relative path, prefix with a scheme/host placeholder and try again. - if ( ! $parts = @parse_url( 'placeholder://placeholder' . $url, $component ) ) { - return $parts; - } - // Remove the placeholder values. - if ( -1 === $component ) { - unset( $parts['scheme'], $parts['host'] ); - } - } else { - return $parts; - } + // Remove the placeholder values. + foreach ( $to_unset as $key ) { + unset( $parts[ $key ] ); } - // < PHP 5.4.7 compat, doesn't detect a schemeless URL's host field. - if ( '//' == substr( $url, 0, 2 ) ) { - if ( -1 === $component && ! isset( $parts['host'] ) ) { - $path_parts = explode( '/', substr( $parts['path'], 2 ), 2 ); - $parts['host'] = $path_parts[0]; - if ( isset( $path_parts[1] ) ) { - $parts['path'] = '/' . $path_parts[1]; - } else { - unset( $parts['path'] ); - } - } elseif ( PHP_URL_HOST === $component || PHP_URL_PATH === $component ) { - $all_parts = @parse_url( $url ); - if ( ! isset( $all_parts['host'] ) ) { - $path_parts = explode( '/', substr( $all_parts['path'], 2 ), 2 ); - if ( PHP_URL_PATH === $component ) { - if ( isset( $path_parts[1] ) ) { - $parts = '/' . $path_parts[1]; - } else { - $parts = null; - } - } elseif ( PHP_URL_HOST === $component ) { - $parts = $path_parts[0]; - } - } - } - } - - return $parts; + return _get_component_from_parsed_url_array( $parts, $component ); +} + +/** + * Retrieve a specific component from a parsed URL array. + * + * @internal + * + * @since 4.7.0 + * + * @param array|false $url_parts The parsed URL. Can be false if the URL failed to parse. + * @param int $component The specific component to retrieve. Use one of the PHP + * predefined constants to specify which one. + * Defaults to -1 (= return all parts as an array). + * @see http://php.net/manual/en/function.parse-url.php + * @return mixed False on parse failure; Array of URL components on success; + * When a specific component has been requested: null if the component + * doesn't exist in the given URL; a sting or - in the case of + * PHP_URL_PORT - integer when it does. See parse_url()'s return values. + */ +function _get_component_from_parsed_url_array( $url_parts, $component = -1 ) { + if ( -1 === $component ) { + return $url_parts; + } + + $key = _wp_translate_php_url_constant_to_key( $component ); + if ( false !== $key && is_array( $url_parts ) && isset( $url_parts[ $key ] ) ) { + return $url_parts[ $key ]; + } else { + return null; + } +} + +/** + * Translate a PHP_URL_* constant to the named array keys PHP uses. + * + * @internal + * + * @since 4.7.0 + * + * @see http://php.net/manual/en/url.constants.php + * + * @param int $constant PHP_URL_* constant. + * @return string|bool The named key or false. + */ +function _wp_translate_php_url_constant_to_key( $constant ) { + $translation = array( + PHP_URL_SCHEME => 'scheme', + PHP_URL_HOST => 'host', + PHP_URL_PORT => 'port', + PHP_URL_USER => 'user', + PHP_URL_PASS => 'pass', + PHP_URL_PATH => 'path', + PHP_URL_QUERY => 'query', + PHP_URL_FRAGMENT => 'fragment', + ); + + if ( isset( $translation[ $constant ] ) ) { + return $translation[ $constant ]; + } else { + return false; + } } diff --git a/wp-includes/version.php b/wp-includes/version.php index e36e8308dc..17f0087678 100644 --- a/wp-includes/version.php +++ b/wp-includes/version.php @@ -4,7 +4,7 @@ * * @global string $wp_version */ -$wp_version = '4.7-alpha-38725'; +$wp_version = '4.7-alpha-38726'; /** * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.