diff --git a/httpclient/src/main/java/org/apache/http/client/utils/URIBuilder.java b/httpclient/src/main/java/org/apache/http/client/utils/URIBuilder.java index 10c6a0766..421e78e1a 100644 --- a/httpclient/src/main/java/org/apache/http/client/utils/URIBuilder.java +++ b/httpclient/src/main/java/org/apache/http/client/utils/URIBuilder.java @@ -165,7 +165,7 @@ public class URIBuilder { } private String encodeUserInfo(final String userInfo) { - return URLEncodedUtils.enc(userInfo, Consts.UTF_8); + return URLEncodedUtils.encUserInfo(userInfo, Consts.UTF_8); } private String encodePath(final String path) { @@ -177,7 +177,7 @@ public class URIBuilder { } private String encodeFragment(final String fragment) { - return URLEncodedUtils.enc(fragment, Consts.UTF_8); + return URLEncodedUtils.encFragment(fragment, Consts.UTF_8); } /** diff --git a/httpclient/src/main/java/org/apache/http/client/utils/URLEncodedUtils.java b/httpclient/src/main/java/org/apache/http/client/utils/URLEncodedUtils.java index 45825600a..800d0cce2 100644 --- a/httpclient/src/main/java/org/apache/http/client/utils/URLEncodedUtils.java +++ b/httpclient/src/main/java/org/apache/http/client/utils/URLEncodedUtils.java @@ -261,16 +261,34 @@ public class URLEncodedUtils { /** * Unreserved characters, i.e. alphanumeric, plus: {@code _ - ! . ~ ' ( ) *} *
- * This list is the same as the {@code unreserved} list in RFC 2396> + * This list is the same as the {@code unreserved} list in + * RFC 2396 */ private static final BitSet UNRESERVED = new BitSet(256); - /** Punctuation characters: , ; : $ & + = */ + /** + * Punctuation characters: , ; : $ & + = + *
+ * These are the additional characters allowed by userinfo. + */ private static final BitSet PUNCT = new BitSet(256); - /** Characters which are safe to use, i.e. {@link #UNRESERVED} plus {@link #PUNCT}uation */ - private static final BitSet SAFE = new BitSet(256); + /** Characters which are safe to use in userinfo, i.e. {@link #UNRESERVED} plus {@link #PUNCT}uation */ + private static final BitSet USERINFO = new BitSet(256); /** Characters which are safe to use in a path, i.e. {@link #UNRESERVED} plus {@link #PUNCT}uation plus / @ */ private static final BitSet PATHSAFE = new BitSet(256); + /** Characters which are safe to use in a fragment, i.e. {@link #RESERVED} plus {@link #UNRESERVED} */ + private static final BitSet FRAGMENT = new BitSet(256); + /** + * Reserved characters, i.e. {@code ;/?:@&=+$,[]} + *
+ * This list is the same as the {@code reserved} list in + * RFC 2396 + * as augmented by + * RFC 2732 + */ + private static final BitSet RESERVED = new BitSet(256); + + /** * Safe characters for x-www-form-urlencoded data, as per java.net.URLEncoder and browser behaviour, * i.e. alphanumeric plus {@code "-", "_", ".", "*"} @@ -290,7 +308,7 @@ public class URLEncodedUtils { for (int i = '0'; i <= '9'; i++) { UNRESERVED.set(i); } - UNRESERVED.set('_'); + UNRESERVED.set('_'); // these are the charactes of the "mark" list UNRESERVED.set('-'); UNRESERVED.set('.'); UNRESERVED.set('*'); @@ -308,14 +326,37 @@ public class URLEncodedUtils { PUNCT.set('&'); PUNCT.set('+'); PUNCT.set('='); - // URL path safe - SAFE.or(UNRESERVED); - SAFE.or(PUNCT); + // Safe for userinfo + USERINFO.or(UNRESERVED); + USERINFO.or(PUNCT); + // URL path safe PATHSAFE.or(UNRESERVED); - PATHSAFE.or(PUNCT); - PATHSAFE.set('/'); + PATHSAFE.set('/'); // segment separator + PATHSAFE.set(';'); // param separator + PATHSAFE.set(':'); // rest as per list in 2396, i.e. : @ & = + $ , PATHSAFE.set('@'); + PATHSAFE.set('&'); + PATHSAFE.set('='); + PATHSAFE.set('+'); + PATHSAFE.set('$'); + PATHSAFE.set(','); + + RESERVED.set(';'); + RESERVED.set('/'); + RESERVED.set('?'); + RESERVED.set(':'); + RESERVED.set('@'); + RESERVED.set('&'); + RESERVED.set('='); + RESERVED.set('+'); + RESERVED.set('$'); + RESERVED.set(','); + RESERVED.set('['); // added by RFC 2732 + RESERVED.set(']'); // added by RFC 2732 + + FRAGMENT.or(RESERVED); + FRAGMENT.or(UNRESERVED); } private static final int RADIX = 16; @@ -462,16 +503,29 @@ public class URLEncodedUtils { } /** - * Encode a String using the {@link #SAFE} set of characters. + * Encode a String using the {@link #USERINFO} set of characters. *
- * Used by URIBuilder to encode userinfo and fragment segments. + * Used by URIBuilder to encode the userinfo segment. * * @param content the string to encode, does not convert space to '+' * @param charset the charset to use * @return the encoded string */ - static String enc(final String content, final Charset charset) { - return urlencode(content, charset, SAFE, false); + static String encUserInfo(final String content, final Charset charset) { + return urlencode(content, charset, USERINFO, false); + } + + /** + * Encode a String using the {@link #FRAGMENT} set of characters. + *
+ * Used by URIBuilder to encode the userinfo segment. + * + * @param content the string to encode, does not convert space to '+' + * @param charset the charset to use + * @return the encoded string + */ + static String encFragment(final String content, final Charset charset) { + return urlencode(content, charset, FRAGMENT, false); } /** diff --git a/httpclient/src/test/java/org/apache/http/client/utils/TestURIBuilder.java b/httpclient/src/test/java/org/apache/http/client/utils/TestURIBuilder.java index fc2630156..dd5c1a474 100644 --- a/httpclient/src/test/java/org/apache/http/client/utils/TestURIBuilder.java +++ b/httpclient/src/test/java/org/apache/http/client/utils/TestURIBuilder.java @@ -193,4 +193,36 @@ public class TestURIBuilder { } + @Test + public void testAgainstURIEncoded() throws Exception { + // Check that the encoded URI generated by URI builder agrees with that generated by using URI directly + final String scheme="https"; + final String host="localhost"; + final String specials="/ abcd!$&*()_-+.,=:;'~<>/@[]|#^%\"{}\\`xyz"; // N.B. excludes £¬¦ + final String formdatasafe = "abcd-_.*zyz"; + URI uri = new URI(scheme, specials, host, 80, specials, + formdatasafe, // TODO replace with specials when supported + specials); + + URI bld = new URIBuilder() + .setScheme(scheme) + .setHost(host) + .setUserInfo(specials) + .setPath(specials) + .addParameter(formdatasafe, null) // TODO replace with specials when supported + .setFragment(specials) + .build(); + + Assert.assertEquals(uri.getHost(), bld.getHost()); + + Assert.assertEquals(uri.getRawUserInfo(), bld.getRawUserInfo()); + + Assert.assertEquals(uri.getRawPath(), bld.getRawPath()); + + Assert.assertEquals(uri.getRawQuery(), bld.getRawQuery()); + + Assert.assertEquals(uri.getRawFragment(), bld.getRawFragment()); + + } + }