From 5f37506e7a2223d216eb3f13beb0dc77e7d027ff Mon Sep 17 00:00:00 2001 From: Oleg Kalnichevski Date: Fri, 15 Feb 2019 16:36:15 +0100 Subject: [PATCH 1/8] HTTPCLIENT-1968: added utility methods to parse and format URI path segments (ported from HttpCore master) --- .../http/client/utils/URLEncodedUtils.java | 155 ++++++++++++++---- .../client/utils/TestURLEncodedUtils.java | 39 +++++ 2 files changed, 162 insertions(+), 32 deletions(-) diff --git a/httpclient/src/main/java/org/apache/http/client/utils/URLEncodedUtils.java b/httpclient/src/main/java/org/apache/http/client/utils/URLEncodedUtils.java index 11b462a6e..56888090a 100644 --- a/httpclient/src/main/java/org/apache/http/client/utils/URLEncodedUtils.java +++ b/httpclient/src/main/java/org/apache/http/client/utils/URLEncodedUtils.java @@ -36,7 +36,9 @@ import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.util.ArrayList; +import java.util.Arrays; import java.util.BitSet; +import java.util.Collections; import java.util.List; import java.util.Scanner; @@ -68,6 +70,12 @@ public class URLEncodedUtils { private static final char QP_SEP_A = '&'; private static final char QP_SEP_S = ';'; private static final String NAME_VALUE_SEPARATOR = "="; + private static final char PATH_SEPARATOR = '/'; + + private static final BitSet PATH_SEPARATORS = new BitSet(256); + static { + PATH_SEPARATORS.set(PATH_SEPARATOR); + } /** * @deprecated 4.5 Use {@link #parse(URI, Charset)} @@ -78,19 +86,12 @@ public class URLEncodedUtils { } /** - * Returns a list of {@link NameValuePair NameValuePairs} as built from the URI's query portion. For example, a URI - * of {@code http://example.org/path/to/file?a=1&b=2&c=3} would return a list of three NameValuePairs, one for a=1, - * one for b=2, and one for c=3. By convention, {@code '&'} and {@code ';'} are accepted as parameter separators. - *

- * This is typically useful while parsing an HTTP PUT. + * Returns a list of {@link NameValuePair}s URI query parameters. + * By convention, {@code '&'} and {@code ';'} are accepted as parameter separators. * - * This API is currently only used for testing. - * - * @param uri - * URI to parse - * @param charset - * Charset to use while parsing the query - * @return a list of {@link NameValuePair} as built from the URI's query portion. + * @param uri input URI. + * @param charset parameter charset. + * @return list of query parameters. * * @since 4.5 */ @@ -230,14 +231,12 @@ public class URLEncodedUtils { } /** - * Returns a list of {@link NameValuePair NameValuePairs} as parsed from the given string using the given character - * encoding. By convention, {@code '&'} and {@code ';'} are accepted as parameter separators. + * Returns a list of {@link NameValuePair}s URI query parameters. + * By convention, {@code '&'} and {@code ';'} are accepted as parameter separators. * - * @param s - * text to parse. - * @param charset - * Encoding to use when decoding the parameters. - * @return a list of {@link NameValuePair} as built from the URI's query portion. + * @param s URI query component. + * @param charset charset to use when decoding the parameters. + * @return list of query parameters. * * @since 4.2 */ @@ -254,13 +253,10 @@ public class URLEncodedUtils { * Returns a list of {@link NameValuePair NameValuePairs} as parsed from the given string using the given character * encoding. * - * @param s - * text to parse. - * @param charset - * Encoding to use when decoding the parameters. - * @param separators - * element separators. - * @return a list of {@link NameValuePair} as built from the URI's query portion. + * @param s input text. + * @param charset parameter charset. + * @param separators parameter separators. + * @return list of query parameters. * * @since 4.3 */ @@ -274,8 +270,7 @@ public class URLEncodedUtils { } /** - * Returns a list of {@link NameValuePair NameValuePairs} as parsed from the given string using - * the given character encoding. + * Returns a list of {@link NameValuePair}s parameters. * * @param buf * text to parse. @@ -321,6 +316,98 @@ public class URLEncodedUtils { return list; } + static List splitSegments(final CharSequence s, final BitSet separators) { + final ParserCursor cursor = new ParserCursor(0, s.length()); + // Skip leading separator + if (cursor.atEnd()) { + return Collections.emptyList(); + } + if (separators.get(s.charAt(cursor.getPos()))) { + cursor.updatePos(cursor.getPos() + 1); + } + final List list = new ArrayList(); + final StringBuilder buf = new StringBuilder(); + for (;;) { + if (cursor.atEnd()) { + list.add(buf.toString()); + break; + } + final char current = s.charAt(cursor.getPos()); + if (separators.get(current)) { + list.add(buf.toString()); + buf.setLength(0); + } else { + buf.append(current); + } + cursor.updatePos(cursor.getPos() + 1); + } + return list; + } + + static List splitPathSegments(final CharSequence s) { + return splitSegments(s, PATH_SEPARATORS); + } + + /** + * Returns a list of URI path segments. + * + * @param s URI path component. + * @param charset parameter charset. + * @return list of segments. + * + * @since 4.5 + */ + public static List parsePathSegments(final CharSequence s, final Charset charset) { + Args.notNull(s, "Char sequence"); + final List list = splitPathSegments(s); + for (int i = 0; i < list.size(); i++) { + list.set(i, urlDecode(list.get(i), charset != null ? charset : Consts.UTF_8, false)); + } + return list; + } + + /** + * Returns a list of URI path segments. + * + * @param s URI path component. + * @return list of segments. + * + * @since 4.5 + */ + public static List parsePathSegments(final CharSequence s) { + return parsePathSegments(s, Consts.UTF_8); + } + + /** + * Returns a string consisting of joint encoded path segments. + * + * @param segments the segments. + * @param charset parameter charset. + * @return URI path component + * + * @since 4.5 + */ + public static String formatSegments(final Iterable segments, final Charset charset) { + Args.notNull(segments, "Segments"); + final StringBuilder result = new StringBuilder(); + for (final String segment : segments) { + result.append(PATH_SEPARATOR).append(urlEncode(segment, charset, PATHSAFE, false)); + } + return result.toString(); + } + + /** + * Returns a string consisting of joint encoded path segments. + * + * @param segments the segments. + * @return URI path component + * + * @since 4.5 + */ + public static String formatSegments(final String... segments) { + return formatSegments(Arrays.asList(segments), Consts.UTF_8); + } + /** * Returns a String that is suitable for use as an {@code application/x-www-form-urlencoded} * list of parameters in an HTTP PUT or HTTP POST. @@ -454,6 +541,8 @@ public class URLEncodedUtils { */ private static final BitSet URLENCODER = new BitSet(256); + private static final BitSet PATH_SPECIAL = new BitSet(256); + static { // unreserved chars // alpha characters @@ -491,9 +580,8 @@ public class URLEncodedUtils { // URL path safe PATHSAFE.or(UNRESERVED); - PATHSAFE.set('/'); // segment separator PATHSAFE.set(';'); // param separator - PATHSAFE.set(':'); // rest as per list in 2396, i.e. : @ & = + $ , + PATHSAFE.set(':'); // RFC 2396 PATHSAFE.set('@'); PATHSAFE.set('&'); PATHSAFE.set('='); @@ -501,6 +589,9 @@ public class URLEncodedUtils { PATHSAFE.set('$'); PATHSAFE.set(','); + PATH_SPECIAL.or(PATHSAFE); + PATH_SPECIAL.set('/'); + RESERVED.set(';'); RESERVED.set('/'); RESERVED.set('?'); @@ -683,7 +774,7 @@ public class URLEncodedUtils { } /** - * Encode a String using the {@link #PATHSAFE} set of characters. + * Encode a String using the {@link #PATH_SPECIAL} set of characters. *

* Used by URIBuilder to encode path segments. * @@ -692,7 +783,7 @@ public class URLEncodedUtils { * @return the encoded string */ static String encPath(final String content, final Charset charset) { - return urlEncode(content, charset, PATHSAFE, false); + return urlEncode(content, charset, PATH_SPECIAL, false); } } diff --git a/httpclient/src/test/java/org/apache/http/client/utils/TestURLEncodedUtils.java b/httpclient/src/test/java/org/apache/http/client/utils/TestURLEncodedUtils.java index 89dfc6dbb..dc4c4970e 100644 --- a/httpclient/src/test/java/org/apache/http/client/utils/TestURLEncodedUtils.java +++ b/httpclient/src/test/java/org/apache/http/client/utils/TestURLEncodedUtils.java @@ -29,6 +29,8 @@ package org.apache.http.client.utils; import java.net.URI; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.List; import org.apache.http.Consts; @@ -37,6 +39,7 @@ import org.apache.http.entity.ContentType; import org.apache.http.entity.StringEntity; import org.apache.http.message.BasicNameValuePair; import org.apache.http.protocol.HTTP; +import org.hamcrest.CoreMatchers; import org.junit.Assert; import org.junit.Test; @@ -102,6 +105,42 @@ public class TestURLEncodedUtils { assertNameValuePair(result.get(1), "d", "e"); } + @Test + public void testParseSegments() throws Exception { + Assert.assertThat(URLEncodedUtils.parsePathSegments("/this/that"), + CoreMatchers.equalTo(Arrays.asList("this", "that"))); + Assert.assertThat(URLEncodedUtils.parsePathSegments("this/that"), + CoreMatchers.equalTo(Arrays.asList("this", "that"))); + Assert.assertThat(URLEncodedUtils.parsePathSegments("this//that"), + CoreMatchers.equalTo(Arrays.asList("this", "", "that"))); + Assert.assertThat(URLEncodedUtils.parsePathSegments("this//that/"), + CoreMatchers.equalTo(Arrays.asList("this", "", "that", ""))); + Assert.assertThat(URLEncodedUtils.parsePathSegments("this//that/%2fthis%20and%20that"), + CoreMatchers.equalTo(Arrays.asList("this", "", "that", "/this and that"))); + Assert.assertThat(URLEncodedUtils.parsePathSegments("this///that//"), + CoreMatchers.equalTo(Arrays.asList("this", "", "", "that", "", ""))); + Assert.assertThat(URLEncodedUtils.parsePathSegments("/"), + CoreMatchers.equalTo(Collections.singletonList(""))); + Assert.assertThat(URLEncodedUtils.parsePathSegments(""), + CoreMatchers.equalTo(Collections.emptyList())); + } + + @Test + public void testFormatSegments() throws Exception { + Assert.assertThat(URLEncodedUtils.formatSegments("this", "that"), + CoreMatchers.equalTo("/this/that")); + Assert.assertThat(URLEncodedUtils.formatSegments("this", "", "that"), + CoreMatchers.equalTo("/this//that")); + Assert.assertThat(URLEncodedUtils.formatSegments("this", "", "that", "/this and that"), + CoreMatchers.equalTo("/this//that/%2Fthis%20and%20that")); + Assert.assertThat(URLEncodedUtils.formatSegments("this", "", "", "that", "", ""), + CoreMatchers.equalTo("/this///that//")); + Assert.assertThat(URLEncodedUtils.formatSegments(""), + CoreMatchers.equalTo("/")); + Assert.assertThat(URLEncodedUtils.formatSegments(), + CoreMatchers.equalTo("")); + } + @Test public void testParseURLCodedContentString() throws Exception { List result; From 030dbffe17c4dfc7908ab70b8aa96d56fa7f5cb7 Mon Sep 17 00:00:00 2001 From: Oleg Kalnichevski Date: Fri, 15 Feb 2019 22:59:40 +0100 Subject: [PATCH 2/8] Improved cache key generation (ported from HttpCore master) --- .../impl/client/cache/CacheKeyGenerator.java | 88 ++++++++++++------- 1 file changed, 56 insertions(+), 32 deletions(-) diff --git a/httpclient-cache/src/main/java/org/apache/http/impl/client/cache/CacheKeyGenerator.java b/httpclient-cache/src/main/java/org/apache/http/impl/client/cache/CacheKeyGenerator.java index 70df8cd04..08e76e40a 100644 --- a/httpclient-cache/src/main/java/org/apache/http/impl/client/cache/CacheKeyGenerator.java +++ b/httpclient-cache/src/main/java/org/apache/http/impl/client/cache/CacheKeyGenerator.java @@ -27,9 +27,8 @@ package org.apache.http.impl.client.cache; import java.io.UnsupportedEncodingException; -import java.net.MalformedURLException; import java.net.URI; -import java.net.URL; +import java.net.URISyntaxException; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Collections; @@ -44,7 +43,10 @@ import org.apache.http.annotation.Contract; import org.apache.http.annotation.ThreadingBehavior; import org.apache.http.client.cache.HeaderConstants; import org.apache.http.client.cache.HttpCacheEntry; +import org.apache.http.client.methods.HttpUriRequest; +import org.apache.http.client.utils.URIBuilder; import org.apache.http.client.utils.URIUtils; +import org.apache.http.util.Args; /** * @since 4.1 @@ -54,6 +56,50 @@ class CacheKeyGenerator { private static final URI BASE_URI = URI.create("http://example.com/"); + static URIBuilder getRequestUriBuilder(final HttpRequest request) throws URISyntaxException { + if (request instanceof HttpUriRequest) { + final URI uri = ((HttpUriRequest) request).getURI(); + if (uri != null) { + return new URIBuilder(uri); + } + } + return new URIBuilder(request.getRequestLine().getUri()); + } + + static URI getRequestUri(final HttpRequest request, final HttpHost target) throws URISyntaxException { + Args.notNull(request, "HTTP request"); + Args.notNull(target, "Target"); + final URIBuilder uriBuilder = getRequestUriBuilder(request); + if (!uriBuilder.isAbsolute()) { + uriBuilder.setScheme(target.getSchemeName()); + uriBuilder.setHost(target.getHostName()); + uriBuilder.setPort(target.getPort()); + } + return uriBuilder.build(); + } + + static URI normalize(final URI requestUri) throws URISyntaxException { + Args.notNull(requestUri, "URI"); + final URIBuilder builder = new URIBuilder(requestUri.isAbsolute() ? URIUtils.resolve(BASE_URI, requestUri) : requestUri) ; + if (builder.getHost() != null) { + if (builder.getScheme() == null) { + builder.setScheme("http"); + } + if (builder.getPort() <= -1) { + if ("http".equalsIgnoreCase(builder.getScheme())) { + builder.setPort(80); + } else if ("https".equalsIgnoreCase(builder.getScheme())) { + builder.setPort(443); + } + } + } + builder.setFragment(null); + if (builder.getPath() == null) { + builder.setPath("/"); + } + return builder.build(); + } + /** * For a given {@link HttpHost} and {@link HttpRequest} get a URI from the * pair that I can use as an identifier KEY into my HttpCache @@ -63,45 +109,23 @@ class CacheKeyGenerator { * @return String the extracted URI */ public String getURI(final HttpHost host, final HttpRequest req) { - if (isRelativeRequest(req)) { - return canonicalizeUri(String.format("%s%s", host.toString(), req.getRequestLine().getUri())); + try { + final URI uri = normalize(getRequestUri(req, host)); + return uri.toASCIIString(); + } catch (final URISyntaxException ex) { + return req.getRequestLine().getUri(); } - return canonicalizeUri(req.getRequestLine().getUri()); } public String canonicalizeUri(final String uri) { try { - final URI normalized = URIUtils.resolve(BASE_URI, uri); - final URL u = new URL(normalized.toASCIIString()); - final String protocol = u.getProtocol(); - final String hostname = u.getHost(); - final int port = canonicalizePort(u.getPort(), protocol); - final String path = u.getPath(); - final String query = u.getQuery(); - final String file = (query != null) ? (path + "?" + query) : path; - final URL out = new URL(protocol, hostname, port, file); - return out.toString(); - } catch (final IllegalArgumentException e) { - return uri; - } catch (final MalformedURLException e) { + final URI normalized = normalize(URIUtils.resolve(BASE_URI, uri)); + return normalized.toASCIIString(); + } catch (final URISyntaxException ex) { return uri; } } - private int canonicalizePort(final int port, final String protocol) { - if (port == -1 && "http".equalsIgnoreCase(protocol)) { - return 80; - } else if (port == -1 && "https".equalsIgnoreCase(protocol)) { - return 443; - } - return port; - } - - private boolean isRelativeRequest(final HttpRequest req) { - final String requestUri = req.getRequestLine().getUri(); - return ("*".equals(requestUri) || requestUri.startsWith("/")); - } - protected String getFullHeaderValue(final Header[] headers) { if (headers == null) { return ""; From 4a976e104ea11436340b8794605391eea7edf78f Mon Sep 17 00:00:00 2001 From: Oleg Kalnichevski Date: Fri, 15 Feb 2019 18:36:45 +0100 Subject: [PATCH 3/8] HTTPCLIENT-1968: URIBuilder to split path component into path segments when digesting a URI (ported from HttpCore master) --- .../apache/http/client/utils/URIBuilder.java | 86 ++++++++++++++++--- .../http/client/utils/TestURIUtils.java | 2 +- 2 files changed, 75 insertions(+), 13 deletions(-) diff --git a/httpclient/src/main/java/org/apache/http/client/utils/URIBuilder.java b/httpclient/src/main/java/org/apache/http/client/utils/URIBuilder.java index 322ee239d..bc7b2cb12 100644 --- a/httpclient/src/main/java/org/apache/http/client/utils/URIBuilder.java +++ b/httpclient/src/main/java/org/apache/http/client/utils/URIBuilder.java @@ -30,6 +30,8 @@ import java.net.URI; import java.net.URISyntaxException; import java.nio.charset.Charset; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.Iterator; import java.util.List; @@ -53,8 +55,8 @@ public class URIBuilder { private String encodedUserInfo; private String host; private int port; - private String path; private String encodedPath; + private List pathSegments; private String encodedQuery; private List queryParams; private String query; @@ -112,6 +114,13 @@ public class URIBuilder { return null; } + private List parsePath(final String path, final Charset charset) { + if (path != null && !path.isEmpty()) { + return URLEncodedUtils.parsePathSegments(path, charset); + } + return null; + } + /** * Builds a {@link URI} instance. */ @@ -147,8 +156,8 @@ public class URIBuilder { } if (this.encodedPath != null) { sb.append(normalizePath(this.encodedPath, sb.length() == 0)); - } else if (this.path != null) { - sb.append(encodePath(normalizePath(this.path, sb.length() == 0))); + } else if (this.pathSegments != null) { + sb.append(encodePath(this.pathSegments)); } if (this.encodedQuery != null) { sb.append("?").append(this.encodedQuery); @@ -186,7 +195,7 @@ public class URIBuilder { this.encodedUserInfo = uri.getRawUserInfo(); this.userInfo = uri.getUserInfo(); this.encodedPath = uri.getRawPath(); - this.path = uri.getPath(); + this.pathSegments = parsePath(uri.getRawPath(), this.charset != null ? this.charset : Consts.UTF_8); this.encodedQuery = uri.getRawQuery(); this.queryParams = parseQuery(uri.getRawQuery(), this.charset != null ? this.charset : Consts.UTF_8); this.encodedFragment = uri.getRawFragment(); @@ -197,8 +206,8 @@ public class URIBuilder { return URLEncodedUtils.encUserInfo(userInfo, this.charset != null ? this.charset : Consts.UTF_8); } - private String encodePath(final String path) { - return URLEncodedUtils.encPath(path, this.charset != null ? this.charset : Consts.UTF_8); + private String encodePath(final List pathSegments) { + return URLEncodedUtils.formatSegments(pathSegments, this.charset != null ? this.charset : Consts.UTF_8); } private String encodeUrlForm(final List params) { @@ -259,9 +268,36 @@ public class URIBuilder { /** * Sets URI path. The value is expected to be unescaped and may contain non ASCII characters. + * + * @return this. */ public URIBuilder setPath(final String path) { - this.path = path; + return setPathSegments(path != null ? URLEncodedUtils.splitPathSegments(path) : null); + } + + /** + * Sets URI path. The value is expected to be unescaped and may contain non ASCII characters. + * + * @return this. + * + * @since 4.5.8 + */ + public URIBuilder setPathSegments(final String... pathSegments) { + this.pathSegments = pathSegments.length > 0 ? Arrays.asList(pathSegments) : null; + this.encodedSchemeSpecificPart = null; + this.encodedPath = null; + return this; + } + + /** + * Sets URI path. The value is expected to be unescaped and may contain non ASCII characters. + * + * @return this. + * + * @since 4.5.8 + */ + public URIBuilder setPathSegments(final List pathSegments) { + this.pathSegments = pathSegments != null && pathSegments.size() > 0 ? new ArrayList(pathSegments) : null; this.encodedSchemeSpecificPart = null; this.encodedPath = null; return this; @@ -462,7 +498,7 @@ public class URIBuilder { * @since 4.3 */ public boolean isOpaque() { - return this.path == null; + return isPathEmpty(); } public String getScheme() { @@ -481,14 +517,40 @@ public class URIBuilder { return this.port; } + /** + * @since 4.5.8 + */ + public boolean isPathEmpty() { + return (this.pathSegments == null || this.pathSegments.isEmpty()) && this.encodedPath == null; + } + + /** + * @since 4.5.8 + */ + public List getPathSegments() { + return this.pathSegments != null ? new ArrayList(this.pathSegments) : Collections.emptyList(); + } + public String getPath() { - return this.path; + if (this.pathSegments == null) { + return null; + } + final StringBuilder result = new StringBuilder(); + for (final String segment : this.pathSegments) { + result.append('/').append(segment); + } + return result.toString(); + } + + /** + * @since 4.5.8 + */ + public boolean isQueryEmpty() { + return (this.queryParams == null || this.queryParams.isEmpty()) && this.encodedQuery == null; } public List getQueryParams() { - return this.queryParams != null - ? new ArrayList(this.queryParams) - : new ArrayList(); + return this.queryParams != null ? new ArrayList(this.queryParams) : Collections.emptyList(); } public String getFragment() { diff --git a/httpclient/src/test/java/org/apache/http/client/utils/TestURIUtils.java b/httpclient/src/test/java/org/apache/http/client/utils/TestURIUtils.java index e446b2ce4..bf9e82874 100644 --- a/httpclient/src/test/java/org/apache/http/client/utils/TestURIUtils.java +++ b/httpclient/src/test/java/org/apache/http/client/utils/TestURIUtils.java @@ -151,7 +151,7 @@ public class TestURIUtils { Assert.assertEquals("http://a/b/c/g", URIUtils.resolve(this.baseURI, "./g").toString()); Assert.assertEquals("http://a/b/c/g/", URIUtils.resolve(this.baseURI, "g/").toString()); Assert.assertEquals("http://a/g", URIUtils.resolve(this.baseURI, "/g").toString()); - Assert.assertEquals("http://g/", URIUtils.resolve(this.baseURI, "//g").toString()); + Assert.assertEquals("http://g", URIUtils.resolve(this.baseURI, "//g").toString()); Assert.assertEquals("http://a/b/c/d;p?y", URIUtils.resolve(this.baseURI, "?y").toString()); Assert.assertEquals("http://a/b/c/d;p?y#f", URIUtils.resolve(this.baseURI, "?y#f") .toString()); From 4a463ebf4ec0167f9bf8770e217ad645b1e0b2db Mon Sep 17 00:00:00 2001 From: Oleg Kalnichevski Date: Sat, 16 Feb 2019 01:17:11 +0100 Subject: [PATCH 4/8] URI normalization code to use URLEncodedUtils#parsePathSegments method to split path segments --- .../impl/client/cache/CacheKeyGenerator.java | 3 -- .../apache/http/client/utils/URIUtils.java | 36 ++++++++----------- .../http/client/utils/TestURIUtils.java | 2 +- 3 files changed, 15 insertions(+), 26 deletions(-) diff --git a/httpclient-cache/src/main/java/org/apache/http/impl/client/cache/CacheKeyGenerator.java b/httpclient-cache/src/main/java/org/apache/http/impl/client/cache/CacheKeyGenerator.java index 08e76e40a..93809ee2c 100644 --- a/httpclient-cache/src/main/java/org/apache/http/impl/client/cache/CacheKeyGenerator.java +++ b/httpclient-cache/src/main/java/org/apache/http/impl/client/cache/CacheKeyGenerator.java @@ -94,9 +94,6 @@ class CacheKeyGenerator { } } builder.setFragment(null); - if (builder.getPath() == null) { - builder.setPath("/"); - } return builder.build(); } diff --git a/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java b/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java index 6a8195029..89c1f7dbd 100644 --- a/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java +++ b/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java @@ -362,31 +362,23 @@ public class URIUtils { } Args.check(uri.isAbsolute(), "Base URI must be absolute"); final URIBuilder builder = new URIBuilder(uri); - final String path = builder.getPath(); - if (path != null && !path.equals("/")) { - final String[] inputSegments = path.split("/"); - final Stack outputSegments = new Stack(); - for (final String inputSegment : inputSegments) { - if ((inputSegment.isEmpty()) || (".".equals(inputSegment))) { - // Do nothing - } else if ("..".equals(inputSegment)) { - if (!outputSegments.isEmpty()) { - outputSegments.pop(); - } - } else { - outputSegments.push(inputSegment); + final List inputSegments = URLEncodedUtils.parsePathSegments(uri.getPath()); + final Stack outputSegments = new Stack(); + for (final String inputSegment : inputSegments) { + if (".".equals(inputSegment)) { + // Do nothing + } else if ("..".equals(inputSegment)) { + if (!outputSegments.isEmpty()) { + outputSegments.pop(); } + } else { + outputSegments.push(inputSegment); } - final StringBuilder outputBuffer = new StringBuilder(); - for (final String outputSegment : outputSegments) { - outputBuffer.append('/').append(outputSegment); - } - if (path.lastIndexOf('/') == path.length() - 1) { - // path.endsWith("/") || path.equals("") - outputBuffer.append('/'); - } - builder.setPath(outputBuffer.toString()); } + if (outputSegments.size() == 0) { + outputSegments.add(""); + } + builder.setPathSegments(outputSegments); if (builder.getScheme() != null) { builder.setScheme(builder.getScheme().toLowerCase(Locale.ROOT)); } diff --git a/httpclient/src/test/java/org/apache/http/client/utils/TestURIUtils.java b/httpclient/src/test/java/org/apache/http/client/utils/TestURIUtils.java index bf9e82874..e446b2ce4 100644 --- a/httpclient/src/test/java/org/apache/http/client/utils/TestURIUtils.java +++ b/httpclient/src/test/java/org/apache/http/client/utils/TestURIUtils.java @@ -151,7 +151,7 @@ public class TestURIUtils { Assert.assertEquals("http://a/b/c/g", URIUtils.resolve(this.baseURI, "./g").toString()); Assert.assertEquals("http://a/b/c/g/", URIUtils.resolve(this.baseURI, "g/").toString()); Assert.assertEquals("http://a/g", URIUtils.resolve(this.baseURI, "/g").toString()); - Assert.assertEquals("http://g", URIUtils.resolve(this.baseURI, "//g").toString()); + Assert.assertEquals("http://g/", URIUtils.resolve(this.baseURI, "//g").toString()); Assert.assertEquals("http://a/b/c/d;p?y", URIUtils.resolve(this.baseURI, "?y").toString()); Assert.assertEquals("http://a/b/c/d;p?y#f", URIUtils.resolve(this.baseURI, "?y#f") .toString()); From be77dc917ff8adc813277154c142e1c4930980ae Mon Sep 17 00:00:00 2001 From: Oleg Kalnichevski Date: Sat, 16 Feb 2019 01:35:45 +0100 Subject: [PATCH 5/8] DefaultRedirectStrategy to use URIUtils#normalizeSyntax method to normalize redirect location URI --- .../java/org/apache/http/client/utils/URIUtils.java | 5 +++-- .../http/impl/client/DefaultRedirectStrategy.java | 11 ++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java b/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java index 89c1f7dbd..ccb4d18a1 100644 --- a/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java +++ b/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java @@ -354,13 +354,14 @@ public class URIUtils { * * @param uri the original URI * @return the URI without dot segments + * + * @since 4.5 */ - static URI normalizeSyntax(final URI uri) throws URISyntaxException { + public static URI normalizeSyntax(final URI uri) throws URISyntaxException { if (uri.isOpaque() || uri.getAuthority() == null) { // opaque and file: URIs return uri; } - Args.check(uri.isAbsolute(), "Base URI must be absolute"); final URIBuilder builder = new URIBuilder(uri); final List inputSegments = URLEncodedUtils.parsePathSegments(uri.getPath()); final Stack outputSegments = new Stack(); diff --git a/httpclient/src/main/java/org/apache/http/impl/client/DefaultRedirectStrategy.java b/httpclient/src/main/java/org/apache/http/impl/client/DefaultRedirectStrategy.java index 9417aaa66..fbca9293c 100644 --- a/httpclient/src/main/java/org/apache/http/impl/client/DefaultRedirectStrategy.java +++ b/httpclient/src/main/java/org/apache/http/impl/client/DefaultRedirectStrategy.java @@ -146,13 +146,14 @@ public class DefaultRedirectStrategy implements RedirectStrategy { final RequestConfig config = clientContext.getRequestConfig(); URI uri = createLocationURI(location); - if (config.isNormalizeUri()) { - uri = uri.normalize(); - } - // rfc2616 demands the location value be a complete URI - // Location = "Location" ":" absoluteURI try { + if (config.isNormalizeUri()) { + uri = URIUtils.normalizeSyntax(uri); + } + + // rfc2616 demands the location value be a complete URI + // Location = "Location" ":" absoluteURI if (!uri.isAbsolute()) { if (!config.isRelativeRedirectsAllowed()) { throw new ProtocolException("Relative redirect location '" From 2004eef66851796291403daeb864f9c0c88529a4 Mon Sep 17 00:00:00 2001 From: Oleg Kalnichevski Date: Sat, 16 Feb 2019 13:20:46 +0100 Subject: [PATCH 6/8] HTTPCLIENT-1968: Preserve escaped PATHSAFE characters when normalizing URI path segments --- .../impl/client/cache/CacheKeyGenerator.java | 8 +++++ .../apache/http/client/utils/URIUtils.java | 33 +++++++++---------- .../impl/client/DefaultRedirectStrategy.java | 14 +------- .../http/client/utils/TestURIUtils.java | 2 +- .../client/TestDefaultRedirectStrategy.java | 7 ---- 5 files changed, 26 insertions(+), 38 deletions(-) diff --git a/httpclient-cache/src/main/java/org/apache/http/impl/client/cache/CacheKeyGenerator.java b/httpclient-cache/src/main/java/org/apache/http/impl/client/cache/CacheKeyGenerator.java index 93809ee2c..2c9d79234 100644 --- a/httpclient-cache/src/main/java/org/apache/http/impl/client/cache/CacheKeyGenerator.java +++ b/httpclient-cache/src/main/java/org/apache/http/impl/client/cache/CacheKeyGenerator.java @@ -46,6 +46,7 @@ import org.apache.http.client.cache.HttpCacheEntry; import org.apache.http.client.methods.HttpUriRequest; import org.apache.http.client.utils.URIBuilder; import org.apache.http.client.utils.URIUtils; +import org.apache.http.client.utils.URLEncodedUtils; import org.apache.http.util.Args; /** @@ -70,6 +71,13 @@ class CacheKeyGenerator { Args.notNull(request, "HTTP request"); Args.notNull(target, "Target"); final URIBuilder uriBuilder = getRequestUriBuilder(request); + + // Decode path segments to preserve original behavior for backward compatibility + final String path = uriBuilder.getPath(); + if (path != null) { + uriBuilder.setPathSegments(URLEncodedUtils.parsePathSegments(path)); + } + if (!uriBuilder.isAbsolute()) { uriBuilder.setScheme(target.getSchemeName()); uriBuilder.setHost(target.getHostName()); diff --git a/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java b/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java index ccb4d18a1..1b303fad4 100644 --- a/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java +++ b/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java @@ -28,7 +28,9 @@ package org.apache.http.client.utils; import java.net.URI; import java.net.URISyntaxException; +import java.util.ArrayList; import java.util.EnumSet; +import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Stack; @@ -214,24 +216,18 @@ public class URIUtils { if (flags.contains(UriFlag.DROP_FRAGMENT)) { uribuilder.setFragment(null); } - final String path = uribuilder.getPath(); - if (TextUtils.isEmpty(path)) { - uribuilder.setPath("/"); - } else { - if (flags.contains(UriFlag.NORMALIZE)) { - final StringBuilder buf = new StringBuilder(path.length()); - boolean foundSlash = false; - for (int i = 0; i < path.length(); i++) { - final char ch = path.charAt(i); - if (ch != '/' || !foundSlash) { - buf.append(ch); - } - foundSlash = ch == '/'; + if (flags.contains(UriFlag.NORMALIZE)) { + final List pathSegments = new ArrayList(uribuilder.getPathSegments()); + for (final Iterator it = pathSegments.iterator(); it.hasNext(); ) { + final String pathSegment = it.next(); + if (pathSegment.isEmpty() && it.hasNext()) { + it.remove(); } - uribuilder.setPath(buf.toString()); - } else { - uribuilder.setPath(path); } + uribuilder.setPathSegments(pathSegments); + } + if (uribuilder.isPathEmpty()) { + uribuilder.setPathSegments(""); } return uribuilder.build(); } @@ -267,6 +263,9 @@ public class URIUtils { if (uribuilder.getUserInfo() != null) { uribuilder.setUserInfo(null); } + if (uribuilder.getPathSegments().isEmpty()) { + uribuilder.setPathSegments(""); + } if (TextUtils.isEmpty(uribuilder.getPath())) { uribuilder.setPath("/"); } @@ -363,7 +362,7 @@ public class URIUtils { return uri; } final URIBuilder builder = new URIBuilder(uri); - final List inputSegments = URLEncodedUtils.parsePathSegments(uri.getPath()); + final List inputSegments = builder.getPathSegments(); final Stack outputSegments = new Stack(); for (final String inputSegment : inputSegments) { if (".".equals(inputSegment)) { diff --git a/httpclient/src/main/java/org/apache/http/impl/client/DefaultRedirectStrategy.java b/httpclient/src/main/java/org/apache/http/impl/client/DefaultRedirectStrategy.java index fbca9293c..94f259aa0 100644 --- a/httpclient/src/main/java/org/apache/http/impl/client/DefaultRedirectStrategy.java +++ b/httpclient/src/main/java/org/apache/http/impl/client/DefaultRedirectStrategy.java @@ -29,7 +29,6 @@ package org.apache.http.impl.client; import java.net.URI; import java.net.URISyntaxException; -import java.util.Locale; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -49,12 +48,10 @@ import org.apache.http.client.methods.HttpHead; import org.apache.http.client.methods.HttpUriRequest; import org.apache.http.client.methods.RequestBuilder; import org.apache.http.client.protocol.HttpClientContext; -import org.apache.http.client.utils.URIBuilder; import org.apache.http.client.utils.URIUtils; import org.apache.http.protocol.HttpContext; import org.apache.http.util.Args; import org.apache.http.util.Asserts; -import org.apache.http.util.TextUtils; /** * Default implementation of {@link RedirectStrategy}. This strategy honors the restrictions @@ -191,16 +188,7 @@ public class DefaultRedirectStrategy implements RedirectStrategy { */ protected URI createLocationURI(final String location) throws ProtocolException { try { - final URIBuilder b = new URIBuilder(new URI(location)); - final String host = b.getHost(); - if (host != null) { - b.setHost(host.toLowerCase(Locale.ROOT)); - } - final String path = b.getPath(); - if (TextUtils.isEmpty(path)) { - b.setPath("/"); - } - return b.build(); + return new URI(location); } catch (final URISyntaxException ex) { throw new ProtocolException("Invalid redirect URI: " + location, ex); } diff --git a/httpclient/src/test/java/org/apache/http/client/utils/TestURIUtils.java b/httpclient/src/test/java/org/apache/http/client/utils/TestURIUtils.java index e446b2ce4..1e2b96e18 100644 --- a/httpclient/src/test/java/org/apache/http/client/utils/TestURIUtils.java +++ b/httpclient/src/test/java/org/apache/http/client/utils/TestURIUtils.java @@ -137,7 +137,7 @@ public class TestURIUtils { Assert.assertEquals("example://a/b/c/%7Bfoo%7D", URIUtils.resolve(this.baseURI, "eXAMPLE://a/./b/../b/%63/%7bfoo%7d").toString()); Assert.assertEquals("http://www.example.com/%3C", URIUtils.resolve(this.baseURI, "http://www.example.com/%3c").toString()); Assert.assertEquals("http://www.example.com/", URIUtils.resolve(this.baseURI, "HTTP://www.EXAMPLE.com/").toString()); - Assert.assertEquals("http://www.example.com/a/", URIUtils.resolve(this.baseURI, "http://www.example.com/a%2f").toString()); + Assert.assertEquals("http://www.example.com/a%2F", URIUtils.resolve(this.baseURI, "http://www.example.com/a%2f").toString()); Assert.assertEquals("http://www.example.com/?q=%26", URIUtils.resolve(this.baseURI, "http://www.example.com/?q=%26").toString()); Assert.assertEquals("http://www.example.com/%23?q=%26", URIUtils.resolve(this.baseURI, "http://www.example.com/%23?q=%26").toString()); Assert.assertEquals("http://www.example.com/blah-(%20-blah-%20&%20-blah-%20)-blah/", diff --git a/httpclient/src/test/java/org/apache/http/impl/client/TestDefaultRedirectStrategy.java b/httpclient/src/test/java/org/apache/http/impl/client/TestDefaultRedirectStrategy.java index b957bc9fa..78e09b5cc 100644 --- a/httpclient/src/test/java/org/apache/http/impl/client/TestDefaultRedirectStrategy.java +++ b/httpclient/src/test/java/org/apache/http/impl/client/TestDefaultRedirectStrategy.java @@ -385,13 +385,6 @@ public class TestDefaultRedirectStrategy { Assert.assertSame(entity, ((HttpEntityEnclosingRequest) redirect2).getEntity()); } - @Test - public void testCreateLocationURI() throws Exception { - final DefaultRedirectStrategy redirectStrategy = new DefaultRedirectStrategy(); - Assert.assertEquals("http://blahblah/", - redirectStrategy.createLocationURI("http://BlahBlah").toASCIIString()); - } - @Test(expected=ProtocolException.class) public void testCreateLocationURIInvalid() throws Exception { final DefaultRedirectStrategy redirectStrategy = new DefaultRedirectStrategy(); From c7985395635cbc7bd630abb4204534536cbdf8ce Mon Sep 17 00:00:00 2001 From: Oleg Kalnichevski Date: Sat, 16 Feb 2019 13:27:25 +0100 Subject: [PATCH 7/8] HTTPCLIENT-1968: Fixed broken API compatibility with 4.4 --- .../apache/http/client/utils/URIUtils.java | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java b/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java index 1b303fad4..e9580e837 100644 --- a/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java +++ b/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java @@ -287,6 +287,21 @@ public class URIUtils { * * @since 4.4 */ + public static URI rewriteURIForRoute(final URI uri, final RouteInfo route) throws URISyntaxException { + return rewriteURIForRoute(uri, route, true); + } + + /** + * A convenience method that optionally converts the original {@link java.net.URI} either + * to a relative or an absolute form as required by the specified route. + * + * @param uri + * original URI. + * @throws URISyntaxException + * If the resulting URI is invalid. + * + * @since 4.5.8 + */ public static URI rewriteURIForRoute(final URI uri, final RouteInfo route, final boolean normalizeUri) throws URISyntaxException { if (uri == null) { return null; @@ -294,8 +309,8 @@ public class URIUtils { if (route.getProxyHost() != null && !route.isTunnelled()) { // Make sure the request URI is absolute return uri.isAbsolute() - ? rewriteURI(uri) - : rewriteURI(uri, route.getTargetHost(), normalizeUri ? DROP_FRAGMENT_AND_NORMALIZE : DROP_FRAGMENT); + ? rewriteURI(uri) + : rewriteURI(uri, route.getTargetHost(), normalizeUri ? DROP_FRAGMENT_AND_NORMALIZE : DROP_FRAGMENT); } // Make sure the request URI is relative return uri.isAbsolute() ? rewriteURI(uri, null, normalizeUri ? DROP_FRAGMENT_AND_NORMALIZE : DROP_FRAGMENT) : rewriteURI(uri); From 0fe3106dff8fa94c7ebe6697d8f2cf35b94bd541 Mon Sep 17 00:00:00 2001 From: Oleg Kalnichevski Date: Sat, 16 Feb 2019 15:29:47 +0100 Subject: [PATCH 8/8] HTTPCLIENT-1968: Fixed incorrect @since annotations --- .../org/apache/http/client/utils/URIUtils.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java b/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java index e9580e837..6a6a742a2 100644 --- a/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java +++ b/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java @@ -51,7 +51,7 @@ public class URIUtils { /** * Flags that control how URI is being rewritten. * - * @since 5.7.8 + * @since 4.5.8 */ public enum UriFlag { DROP_FRAGMENT, @@ -61,28 +61,28 @@ public class URIUtils { /** * Empty set of uri flags. * - * @since 5.7.8 + * @since 4.5.8 */ public static final EnumSet NO_FLAGS = EnumSet.noneOf(UriFlag.class); /** * Set of uri flags containing {@link UriFlag#DROP_FRAGMENT}. * - * @since 5.7.8 + * @since 4.5.8 */ public static final EnumSet DROP_FRAGMENT = EnumSet.of(UriFlag.DROP_FRAGMENT); /** * Set of uri flags containing {@link UriFlag#NORMALIZE}. * - * @since 5.7.8 + * @since 4.5.8 */ public static final EnumSet NORMALIZE = EnumSet.of(UriFlag.NORMALIZE); /** * Set of uri flags containing {@link UriFlag#DROP_FRAGMENT} and {@link UriFlag#NORMALIZE}. * - * @since 5.7.8 + * @since 4.5.8 */ public static final EnumSet DROP_FRAGMENT_AND_NORMALIZE = EnumSet.of(UriFlag.DROP_FRAGMENT, UriFlag.NORMALIZE); @@ -166,7 +166,7 @@ public class URIUtils { * * @throws URISyntaxException * If the resulting URI is invalid. - * @deprecated (5.7.8) Use {@link #rewriteURI(URI, HttpHost, EnumSet)} + * @deprecated (4.5.8) Use {@link #rewriteURI(URI, HttpHost, EnumSet)} */ @Deprecated public static URI rewriteURI( @@ -192,7 +192,7 @@ public class URIUtils { * * @throws URISyntaxException * If the resulting URI is invalid. - * @since 5.7.8 + * @since 4.5.8 */ public static URI rewriteURI( final URI uri,