HTTPCLIENT-1385: Fixed path normalization in CacheKeyGenerator

Contributed by James Leigh <james at 3roundstones dot com>

git-svn-id: https://svn.apache.org/repos/asf/httpcomponents/httpclient/trunk@1513622 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Oleg Kalnichevski 2013-08-13 19:38:48 +00:00
parent 5e2c01472d
commit 71ef668258
5 changed files with 101 additions and 28 deletions

View File

@ -2,6 +2,9 @@
Changes since release 4.3 BETA2
-------------------
* [HTTPCLIENT-1385] Fixed path normalization in CacheKeyGenerator
Contributed by James Leigh <james at 3roundstones dot com>
* [HTTPCLIENT-1370] Response to non-GET requests should never be cached with the default
ResponseCachingPolicy
Contributed by James Leigh <james at 3roundstones dot com>

View File

@ -29,9 +29,7 @@ package org.apache.http.impl.client.cache;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collections;
@ -45,6 +43,7 @@ import org.apache.http.HttpRequest;
import org.apache.http.annotation.Immutable;
import org.apache.http.client.cache.HeaderConstants;
import org.apache.http.client.cache.HttpCacheEntry;
import org.apache.http.client.utils.URIUtils;
/**
* @since 4.1
@ -52,6 +51,8 @@ import org.apache.http.client.cache.HttpCacheEntry;
@Immutable
class CacheKeyGenerator {
private static final URI BASE_URI = URI.create("http://example.com/");
/**
* For a given {@link HttpHost} and {@link HttpRequest} get a URI from the
* pair that I can use as an identifier KEY into my HttpCache
@ -69,33 +70,23 @@ class CacheKeyGenerator {
public String canonicalizeUri(final String uri) {
try {
final URL u = new URL(uri);
final String protocol = u.getProtocol().toLowerCase();
final String hostname = u.getHost().toLowerCase();
final URI normalized = URIUtils.resolve(BASE_URI, uri);
final URL u = new URL(normalized.toASCIIString());
final String protocol = u.getProtocol();
final String hostname = u.getHost();
final int port = canonicalizePort(u.getPort(), protocol);
String path = canonicalizePath(u.getPath());
if ("".equals(path)) {
path = "/";
}
final String path = u.getPath();
final String query = u.getQuery();
final String file = (query != null) ? (path + "?" + query) : path;
final URL out = new URL(protocol, hostname, port, file);
return out.toString();
} catch (final IllegalArgumentException e) {
return uri;
} catch (final MalformedURLException e) {
return uri;
}
}
private String canonicalizePath(final String path) {
try {
final String decoded = URLDecoder.decode(path, "UTF-8");
return (new URI(decoded)).getPath();
} catch (final UnsupportedEncodingException e) {
} catch (final URISyntaxException e) {
}
return path;
}
private int canonicalizePort(final int port, final String protocol) {
if (port == -1 && "http".equalsIgnoreCase(protocol)) {
return 80;

View File

@ -357,6 +357,46 @@ public class TestCacheKeyGenerator {
Assert.assertEquals(extractor.getURI(host, req1), extractor.getURI(host, req2));
}
@Test
public void testExtraDotSegmentsAreIgnored() {
final HttpHost host = new HttpHost("foo.example.com");
final HttpRequest req1 = new BasicHttpRequest("GET", "/", HttpVersion.HTTP_1_1);
final HttpRequest req2 = new HttpGet("http://foo.example.com/./");
Assert.assertEquals(extractor.getURI(host, req1), extractor.getURI(host, req2));
}
@Test
public void testExtraDotDotSegmentsAreIgnored() {
final HttpHost host = new HttpHost("foo.example.com");
final HttpRequest req1 = new BasicHttpRequest("GET", "/", HttpVersion.HTTP_1_1);
final HttpRequest req2 = new HttpGet("http://foo.example.com/.././../");
Assert.assertEquals(extractor.getURI(host, req1), extractor.getURI(host, req2));
}
@Test
public void testIntermidateDotDotSegementsAreEquivalent() {
final HttpHost host = new HttpHost("foo.example.com");
final HttpRequest req1 = new BasicHttpRequest("GET", "/home.html", HttpVersion.HTTP_1_1);
final HttpRequest req2 = new BasicHttpRequest("GET", "/%7Esmith/../home.html", HttpVersion.HTTP_1_1);
Assert.assertEquals(extractor.getURI(host, req1), extractor.getURI(host, req2));
}
@Test
public void testIntermidateEncodedDotDotSegementsAreEquivalent() {
final HttpHost host = new HttpHost("foo.example.com");
final HttpRequest req1 = new BasicHttpRequest("GET", "/home.html", HttpVersion.HTTP_1_1);
final HttpRequest req2 = new BasicHttpRequest("GET", "/%7Esmith%2F../home.html", HttpVersion.HTTP_1_1);
Assert.assertEquals(extractor.getURI(host, req1), extractor.getURI(host, req2));
}
@Test
public void testIntermidateDotSegementsAreEquivalent() {
final HttpHost host = new HttpHost("foo.example.com");
final HttpRequest req1 = new BasicHttpRequest("GET", "/~smith/home.html", HttpVersion.HTTP_1_1);
final HttpRequest req2 = new BasicHttpRequest("GET", "/%7Esmith/./home.html", HttpVersion.HTTP_1_1);
Assert.assertEquals(extractor.getURI(host, req1), extractor.getURI(host, req2));
}
@Test
public void testEquivalentPathEncodingsAreEquivalent() {
final HttpHost host = new HttpHost("foo.example.com");
@ -372,4 +412,12 @@ public class TestCacheKeyGenerator {
final HttpRequest req2 = new BasicHttpRequest("GET", "/%7Esmith%2Fhome.html", HttpVersion.HTTP_1_1);
Assert.assertEquals(extractor.getURI(host, req1), extractor.getURI(host, req2));
}
@Test
public void testEquivalentExtraPathEncodingsWithPercentAreEquivalent() {
final HttpHost host = new HttpHost("foo.example.com");
final HttpRequest req1 = new BasicHttpRequest("GET", "/~smith/home%20folder.html", HttpVersion.HTTP_1_1);
final HttpRequest req2 = new BasicHttpRequest("GET", "/%7Esmith%2Fhome%20folder.html", HttpVersion.HTTP_1_1);
Assert.assertEquals(extractor.getURI(host, req1), extractor.getURI(host, req2));
}
}

View File

@ -233,7 +233,7 @@ public class URIUtils {
resolved = URI.create(resolvedString.substring(0,
resolvedString.indexOf('#')));
}
return removeDotSegments(resolved);
return normalizeSyntax(resolved);
}
/**
@ -252,17 +252,18 @@ public class URIUtils {
}
/**
* Removes dot segments according to RFC 3986, section 5.2.4
* Removes dot segments according to RFC 3986, section 5.2.4 and
* Syntax-Based Normalization according to RFC 3986, section 6.2.2.
*
* @param uri the original URI
* @return the URI without dot segments
*/
private static URI removeDotSegments(final URI uri) {
final String path = uri.getPath();
if ((path == null) || (path.indexOf("/.") == -1)) {
// No dot segments to remove
private static URI normalizeSyntax(final URI uri) {
if (uri.isOpaque()) {
return uri;
}
Args.check(uri.isAbsolute(), "Base URI must be absolute");
final String path = uri.getPath() == null ? "" : uri.getPath();
final String[] inputSegments = path.split("/");
final Stack<String> outputSegments = new Stack<String>();
for (final String inputSegment : inputSegments) {
@ -281,9 +282,29 @@ public class URIUtils {
for (final String outputSegment : outputSegments) {
outputBuffer.append('/').append(outputSegment);
}
if (path.lastIndexOf('/') == path.length() - 1) {
// path.endsWith("/") || path.equals("")
outputBuffer.append('/');
}
try {
return new URI(uri.getScheme(), uri.getAuthority(),
outputBuffer.toString(), uri.getQuery(), uri.getFragment());
final String scheme = uri.getScheme().toLowerCase();
final String auth = uri.getAuthority().toLowerCase();
final URI ref = new URI(scheme, auth, outputBuffer.toString(),
null, null);
if (uri.getQuery() == null && uri.getFragment() == null) {
return ref;
}
final StringBuilder normalized = new StringBuilder(
ref.toASCIIString());
if (uri.getQuery() != null) {
// query string passed through unchanged
normalized.append('?').append(uri.getRawQuery());
}
if (uri.getFragment() != null) {
// fragment passed through unchanged
normalized.append('#').append(uri.getRawFragment());
}
return URI.create(normalized.toString());
} catch (final URISyntaxException e) {
throw new IllegalArgumentException(e);
}

View File

@ -100,6 +100,16 @@ public class TestURIUtils {
URI.create("http://thishost:80/stuff#crap"), target, true).toString());
}
@Test
public void testNormalization() {
Assert.assertEquals("example://a/b/c/%7Bfoo%7D", URIUtils.resolve(this.baseURI, "eXAMPLE://a/./b/../b/%63/%7bfoo%7d").toString());
Assert.assertEquals("http://www.example.com/%3C", URIUtils.resolve(this.baseURI, "http://www.example.com/%3c").toString());
Assert.assertEquals("http://www.example.com/", URIUtils.resolve(this.baseURI, "HTTP://www.EXAMPLE.com/").toString());
Assert.assertEquals("http://www.example.com/a/", URIUtils.resolve(this.baseURI, "http://www.example.com/a%2f").toString());
Assert.assertEquals("http://www.example.com/?q=%26", URIUtils.resolve(this.baseURI, "http://www.example.com/?q=%26").toString());
Assert.assertEquals("http://www.example.com/%23?q=%26", URIUtils.resolve(this.baseURI, "http://www.example.com/%23?q=%26").toString());
}
@Test
public void testResolve() {
Assert.assertEquals("g:h", URIUtils.resolve(this.baseURI, "g:h").toString());
@ -107,7 +117,7 @@ public class TestURIUtils {
Assert.assertEquals("http://a/b/c/g", URIUtils.resolve(this.baseURI, "./g").toString());
Assert.assertEquals("http://a/b/c/g/", URIUtils.resolve(this.baseURI, "g/").toString());
Assert.assertEquals("http://a/g", URIUtils.resolve(this.baseURI, "/g").toString());
Assert.assertEquals("http://g", URIUtils.resolve(this.baseURI, "//g").toString());
Assert.assertEquals("http://g/", URIUtils.resolve(this.baseURI, "//g").toString());
Assert.assertEquals("http://a/b/c/d;p?y", URIUtils.resolve(this.baseURI, "?y").toString());
Assert.assertEquals("http://a/b/c/d;p?y#f", URIUtils.resolve(this.baseURI, "?y#f")
.toString());