From 71ef6682589e7c0ae5b59dacf4120f104ea63973 Mon Sep 17 00:00:00 2001 From: Oleg Kalnichevski Date: Tue, 13 Aug 2013 19:38:48 +0000 Subject: [PATCH] HTTPCLIENT-1385: Fixed path normalization in CacheKeyGenerator Contributed by James Leigh git-svn-id: https://svn.apache.org/repos/asf/httpcomponents/httpclient/trunk@1513622 13f79535-47bb-0310-9956-ffa450edef68 --- RELEASE_NOTES.txt | 3 ++ .../impl/client/cache/CacheKeyGenerator.java | 29 ++++------- .../client/cache/TestCacheKeyGenerator.java | 48 +++++++++++++++++++ .../apache/http/client/utils/URIUtils.java | 37 ++++++++++---- .../http/client/utils/TestURIUtils.java | 12 ++++- 5 files changed, 101 insertions(+), 28 deletions(-) diff --git a/RELEASE_NOTES.txt b/RELEASE_NOTES.txt index 92459653c..da992ecbf 100644 --- a/RELEASE_NOTES.txt +++ b/RELEASE_NOTES.txt @@ -2,6 +2,9 @@ Changes since release 4.3 BETA2 ------------------- +* [HTTPCLIENT-1385] Fixed path normalization in CacheKeyGenerator + Contributed by James Leigh + * [HTTPCLIENT-1370] Response to non-GET requests should never be cached with the default ResponseCachingPolicy Contributed by James Leigh diff --git a/httpclient-cache/src/main/java/org/apache/http/impl/client/cache/CacheKeyGenerator.java b/httpclient-cache/src/main/java/org/apache/http/impl/client/cache/CacheKeyGenerator.java index 554cb9aee..68a7bcc15 100644 --- a/httpclient-cache/src/main/java/org/apache/http/impl/client/cache/CacheKeyGenerator.java +++ b/httpclient-cache/src/main/java/org/apache/http/impl/client/cache/CacheKeyGenerator.java @@ -29,9 +29,7 @@ import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URI; -import java.net.URISyntaxException; import java.net.URL; -import java.net.URLDecoder; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Collections; @@ -45,6 +43,7 @@ import org.apache.http.annotation.Immutable; import org.apache.http.client.cache.HeaderConstants; import org.apache.http.client.cache.HttpCacheEntry; +import org.apache.http.client.utils.URIUtils; /** * @since 4.1 @@ -52,6 +51,8 @@ @Immutable class CacheKeyGenerator { + private static final URI BASE_URI = URI.create("http://example.com/"); + /** * For a given {@link HttpHost} and {@link HttpRequest} get a URI from the * pair that I can use as an identifier KEY into my HttpCache @@ -69,33 +70,23 @@ public String getURI(final HttpHost host, final HttpRequest req) { public String canonicalizeUri(final String uri) { try { - final URL u = new URL(uri); - final String protocol = u.getProtocol().toLowerCase(); - final String hostname = u.getHost().toLowerCase(); + final URI normalized = URIUtils.resolve(BASE_URI, uri); + final URL u = new URL(normalized.toASCIIString()); + final String protocol = u.getProtocol(); + final String hostname = u.getHost(); final int port = canonicalizePort(u.getPort(), protocol); - String path = canonicalizePath(u.getPath()); - if ("".equals(path)) { - path = "/"; - } + final String path = u.getPath(); final String query = u.getQuery(); final String file = (query != null) ? (path + "?" + query) : path; final URL out = new URL(protocol, hostname, port, file); return out.toString(); + } catch (final IllegalArgumentException e) { + return uri; } catch (final MalformedURLException e) { return uri; } } - private String canonicalizePath(final String path) { - try { - final String decoded = URLDecoder.decode(path, "UTF-8"); - return (new URI(decoded)).getPath(); - } catch (final UnsupportedEncodingException e) { - } catch (final URISyntaxException e) { - } - return path; - } - private int canonicalizePort(final int port, final String protocol) { if (port == -1 && "http".equalsIgnoreCase(protocol)) { return 80; diff --git a/httpclient-cache/src/test/java/org/apache/http/impl/client/cache/TestCacheKeyGenerator.java b/httpclient-cache/src/test/java/org/apache/http/impl/client/cache/TestCacheKeyGenerator.java index 37cbe3381..b566ee6cb 100644 --- a/httpclient-cache/src/test/java/org/apache/http/impl/client/cache/TestCacheKeyGenerator.java +++ b/httpclient-cache/src/test/java/org/apache/http/impl/client/cache/TestCacheKeyGenerator.java @@ -357,6 +357,46 @@ public void testEmptyAbsPathIsEquivalentToSlash() { Assert.assertEquals(extractor.getURI(host, req1), extractor.getURI(host, req2)); } + @Test + public void testExtraDotSegmentsAreIgnored() { + final HttpHost host = new HttpHost("foo.example.com"); + final HttpRequest req1 = new BasicHttpRequest("GET", "/", HttpVersion.HTTP_1_1); + final HttpRequest req2 = new HttpGet("http://foo.example.com/./"); + Assert.assertEquals(extractor.getURI(host, req1), extractor.getURI(host, req2)); + } + + @Test + public void testExtraDotDotSegmentsAreIgnored() { + final HttpHost host = new HttpHost("foo.example.com"); + final HttpRequest req1 = new BasicHttpRequest("GET", "/", HttpVersion.HTTP_1_1); + final HttpRequest req2 = new HttpGet("http://foo.example.com/.././../"); + Assert.assertEquals(extractor.getURI(host, req1), extractor.getURI(host, req2)); + } + + @Test + public void testIntermidateDotDotSegementsAreEquivalent() { + final HttpHost host = new HttpHost("foo.example.com"); + final HttpRequest req1 = new BasicHttpRequest("GET", "/home.html", HttpVersion.HTTP_1_1); + final HttpRequest req2 = new BasicHttpRequest("GET", "/%7Esmith/../home.html", HttpVersion.HTTP_1_1); + Assert.assertEquals(extractor.getURI(host, req1), extractor.getURI(host, req2)); + } + + @Test + public void testIntermidateEncodedDotDotSegementsAreEquivalent() { + final HttpHost host = new HttpHost("foo.example.com"); + final HttpRequest req1 = new BasicHttpRequest("GET", "/home.html", HttpVersion.HTTP_1_1); + final HttpRequest req2 = new BasicHttpRequest("GET", "/%7Esmith%2F../home.html", HttpVersion.HTTP_1_1); + Assert.assertEquals(extractor.getURI(host, req1), extractor.getURI(host, req2)); + } + + @Test + public void testIntermidateDotSegementsAreEquivalent() { + final HttpHost host = new HttpHost("foo.example.com"); + final HttpRequest req1 = new BasicHttpRequest("GET", "/~smith/home.html", HttpVersion.HTTP_1_1); + final HttpRequest req2 = new BasicHttpRequest("GET", "/%7Esmith/./home.html", HttpVersion.HTTP_1_1); + Assert.assertEquals(extractor.getURI(host, req1), extractor.getURI(host, req2)); + } + @Test public void testEquivalentPathEncodingsAreEquivalent() { final HttpHost host = new HttpHost("foo.example.com"); @@ -372,4 +412,12 @@ public void testEquivalentExtraPathEncodingsAreEquivalent() { final HttpRequest req2 = new BasicHttpRequest("GET", "/%7Esmith%2Fhome.html", HttpVersion.HTTP_1_1); Assert.assertEquals(extractor.getURI(host, req1), extractor.getURI(host, req2)); } + + @Test + public void testEquivalentExtraPathEncodingsWithPercentAreEquivalent() { + final HttpHost host = new HttpHost("foo.example.com"); + final HttpRequest req1 = new BasicHttpRequest("GET", "/~smith/home%20folder.html", HttpVersion.HTTP_1_1); + final HttpRequest req2 = new BasicHttpRequest("GET", "/%7Esmith%2Fhome%20folder.html", HttpVersion.HTTP_1_1); + Assert.assertEquals(extractor.getURI(host, req1), extractor.getURI(host, req2)); + } } diff --git a/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java b/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java index 2a852b812..6b1077ace 100644 --- a/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java +++ b/httpclient/src/main/java/org/apache/http/client/utils/URIUtils.java @@ -233,7 +233,7 @@ public static URI resolve(final URI baseURI, final URI reference){ resolved = URI.create(resolvedString.substring(0, resolvedString.indexOf('#'))); } - return removeDotSegments(resolved); + return normalizeSyntax(resolved); } /** @@ -252,17 +252,18 @@ private static URI resolveReferenceStartingWithQueryString( } /** - * Removes dot segments according to RFC 3986, section 5.2.4 + * Removes dot segments according to RFC 3986, section 5.2.4 and + * Syntax-Based Normalization according to RFC 3986, section 6.2.2. * * @param uri the original URI * @return the URI without dot segments */ - private static URI removeDotSegments(final URI uri) { - final String path = uri.getPath(); - if ((path == null) || (path.indexOf("/.") == -1)) { - // No dot segments to remove + private static URI normalizeSyntax(final URI uri) { + if (uri.isOpaque()) { return uri; } + Args.check(uri.isAbsolute(), "Base URI must be absolute"); + final String path = uri.getPath() == null ? "" : uri.getPath(); final String[] inputSegments = path.split("/"); final Stack outputSegments = new Stack(); for (final String inputSegment : inputSegments) { @@ -281,9 +282,29 @@ private static URI removeDotSegments(final URI uri) { for (final String outputSegment : outputSegments) { outputBuffer.append('/').append(outputSegment); } + if (path.lastIndexOf('/') == path.length() - 1) { + // path.endsWith("/") || path.equals("") + outputBuffer.append('/'); + } try { - return new URI(uri.getScheme(), uri.getAuthority(), - outputBuffer.toString(), uri.getQuery(), uri.getFragment()); + final String scheme = uri.getScheme().toLowerCase(); + final String auth = uri.getAuthority().toLowerCase(); + final URI ref = new URI(scheme, auth, outputBuffer.toString(), + null, null); + if (uri.getQuery() == null && uri.getFragment() == null) { + return ref; + } + final StringBuilder normalized = new StringBuilder( + ref.toASCIIString()); + if (uri.getQuery() != null) { + // query string passed through unchanged + normalized.append('?').append(uri.getRawQuery()); + } + if (uri.getFragment() != null) { + // fragment passed through unchanged + normalized.append('#').append(uri.getRawFragment()); + } + return URI.create(normalized.toString()); } catch (final URISyntaxException e) { throw new IllegalArgumentException(e); } diff --git a/httpclient/src/test/java/org/apache/http/client/utils/TestURIUtils.java b/httpclient/src/test/java/org/apache/http/client/utils/TestURIUtils.java index e2b728a4b..67919af22 100644 --- a/httpclient/src/test/java/org/apache/http/client/utils/TestURIUtils.java +++ b/httpclient/src/test/java/org/apache/http/client/utils/TestURIUtils.java @@ -100,6 +100,16 @@ public void testRewriteScheme() throws Exception { URI.create("http://thishost:80/stuff#crap"), target, true).toString()); } + @Test + public void testNormalization() { + Assert.assertEquals("example://a/b/c/%7Bfoo%7D", URIUtils.resolve(this.baseURI, "eXAMPLE://a/./b/../b/%63/%7bfoo%7d").toString()); + Assert.assertEquals("http://www.example.com/%3C", URIUtils.resolve(this.baseURI, "http://www.example.com/%3c").toString()); + Assert.assertEquals("http://www.example.com/", URIUtils.resolve(this.baseURI, "HTTP://www.EXAMPLE.com/").toString()); + Assert.assertEquals("http://www.example.com/a/", URIUtils.resolve(this.baseURI, "http://www.example.com/a%2f").toString()); + Assert.assertEquals("http://www.example.com/?q=%26", URIUtils.resolve(this.baseURI, "http://www.example.com/?q=%26").toString()); + Assert.assertEquals("http://www.example.com/%23?q=%26", URIUtils.resolve(this.baseURI, "http://www.example.com/%23?q=%26").toString()); + } + @Test public void testResolve() { Assert.assertEquals("g:h", URIUtils.resolve(this.baseURI, "g:h").toString()); @@ -107,7 +117,7 @@ public void testResolve() { Assert.assertEquals("http://a/b/c/g", URIUtils.resolve(this.baseURI, "./g").toString()); Assert.assertEquals("http://a/b/c/g/", URIUtils.resolve(this.baseURI, "g/").toString()); Assert.assertEquals("http://a/g", URIUtils.resolve(this.baseURI, "/g").toString()); - Assert.assertEquals("http://g", URIUtils.resolve(this.baseURI, "//g").toString()); + Assert.assertEquals("http://g/", URIUtils.resolve(this.baseURI, "//g").toString()); Assert.assertEquals("http://a/b/c/d;p?y", URIUtils.resolve(this.baseURI, "?y").toString()); Assert.assertEquals("http://a/b/c/d;p?y#f", URIUtils.resolve(this.baseURI, "?y#f") .toString());