[HTTPCLIENT-1366] org.apache.http.client.utils.URLEncodedUtils should parse the semicolon as a query parameter separator. Contributed by Gary Gregory <ggregory at apache.org>

git-svn-id: https://svn.apache.org/repos/asf/httpcomponents/httpclient/trunk@1490331 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Gary D. Gregory 2013-06-06 16:00:05 +00:00
parent cc604018b9
commit aa09b30167
3 changed files with 163 additions and 37 deletions

View File

@ -1,6 +1,9 @@
Changes since release 4.3 BETA2
-------------------
* [HTTPCLIENT-1366] org.apache.http.client.utils.URLEncodedUtils should parse the semicolon as a query parameter separator.
Contributed by Gary Gregory <ggregory at apache.org>
* [HTTPCLIENT-1365] NPE when ManagedHttpClientConnectionFactory.create(ConnectionConfig) is called with null.
Contributed by Gary Gregory <ggregory at apache.org>

View File

@ -60,48 +60,54 @@ import org.apache.http.util.EntityUtils;
@Immutable
public class URLEncodedUtils {
/**
* The default HTML form content type.
*/
public static final String CONTENT_TYPE = "application/x-www-form-urlencoded";
private static final String PARAMETER_SEPARATOR = "&";
private static final char QP_SEP_A = '&';
private static final char QP_SEP_S = ';';
private static final String NAME_VALUE_SEPARATOR = "=";
/**
* Returns a list of {@link NameValuePair NameValuePairs} as built from the
* URI's query portion. For example, a URI of
* http://example.org/path/to/file?a=1&b=2&c=3 would return a list of three
* NameValuePairs, one for a=1, one for b=2, and one for c=3.
* Returns a list of {@link NameValuePair NameValuePairs} as built from the URI's query portion. For example, a URI
* of http://example.org/path/to/file?a=1&b=2&c=3 would return a list of three NameValuePairs, one for a=1, one for
* b=2, and one for c=3. By convention, {@code '&'} and {@code ';'} are accepted as parameter separators.
* <p>
* This is typically useful while parsing an HTTP PUT.
*
* This API is currently only used for testing.
*
* @param uri
* URI to parse
* @param charset
* charset name to use while parsing the query
* Charset name to use while parsing the query
* @return a list of {@link NameValuePair} as built from the URI's query portion.
*/
public static List <NameValuePair> parse (final URI uri, final String charset) {
public static List <NameValuePair> parse(final URI uri, final String charset) {
final String query = uri.getRawQuery();
if (query != null && query.length() > 0) {
final List<NameValuePair> result = new ArrayList<NameValuePair>();
final Scanner scanner = new Scanner(query);
parse(result, scanner, charset);
parse(result, scanner, QP_SEP_PATTERN, charset);
return result;
} else {
return Collections.emptyList();
}
return Collections.emptyList();
}
/**
* Returns a list of {@link NameValuePair NameValuePairs} as parsed from an
* {@link HttpEntity}. The encoding is taken from the entity's
* Content-Encoding header.
* Returns a list of {@link NameValuePair NameValuePairs} as parsed from an {@link HttpEntity}. The encoding is
* taken from the entity's Content-Encoding header.
* <p>
* This is typically used while parsing an HTTP POST.
*
* @param entity
* The entity to parse
* @return a list of {@link NameValuePair} as built from the URI's query portion.
* @throws IOException
* If there was an exception getting the entity's data.
*/
public static List <NameValuePair> parse (
public static List <NameValuePair> parse(
final HttpEntity entity) throws IOException {
final ContentType contentType = ContentType.get(entity);
if (contentType != null && contentType.getMimeType().equalsIgnoreCase(CONTENT_TYPE)) {
@ -111,7 +117,7 @@ public class URLEncodedUtils {
if (charset == null) {
charset = HTTP.DEF_CONTENT_CHARSET;
}
return parse(content, charset);
return parse(content, charset, QP_SEPS);
}
}
return Collections.emptyList();
@ -121,7 +127,7 @@ public class URLEncodedUtils {
* Returns true if the entity's Content-Type header is
* <code>application/x-www-form-urlencoded</code>.
*/
public static boolean isEncoded (final HttpEntity entity) {
public static boolean isEncoded(final HttpEntity entity) {
final Header h = entity.getContentType();
if (h != null) {
final HeaderElement[] elems = h.getElements();
@ -136,6 +142,26 @@ public class URLEncodedUtils {
}
}
/**
* Adds all parameters within the Scanner to the list of <code>parameters</code>, as encoded by
* <code>encoding</code>. For example, a scanner containing the string <code>a=1&b=2&c=3</code> would add the
* {@link NameValuePair NameValuePairs} a=1, b=2, and c=3 to the list of parameters. By convention, {@code '&'} and
* {@code ';'} are accepted as parameter separators.
*
* @param parameters
* List to add parameters to.
* @param scanner
* Input that contains the parameters to parse.
* @param charset
* Encoding to use when decoding the parameters.
*/
public static void parse(
final List <NameValuePair> parameters,
final Scanner scanner,
final String charset) {
parse(parameters, scanner, QP_SEP_PATTERN, charset);
}
/**
* Adds all parameters within the Scanner to the list of
* <code>parameters</code>, as encoded by <code>encoding</code>. For
@ -147,14 +173,17 @@ public class URLEncodedUtils {
* List to add parameters to.
* @param scanner
* Input that contains the parameters to parse.
* @param parameterSepartorPattern
* The Pattern string for parameter separators, by convention {@code "[&;]"}
* @param charset
* Encoding to use when decoding the parameters.
*/
public static void parse (
public static void parse(
final List <NameValuePair> parameters,
final Scanner scanner,
final String parameterSepartorPattern,
final String charset) {
scanner.useDelimiter(PARAMETER_SEPARATOR);
scanner.useDelimiter(parameterSepartorPattern);
while (scanner.hasNext()) {
String name = null;
String value = null;
@ -170,20 +199,47 @@ public class URLEncodedUtils {
}
}
private static final char[] DELIM = new char[] { '&' };
/**
* Query parameter separators.
*/
private static final char[] QP_SEPS = new char[] { QP_SEP_A, QP_SEP_S };
/**
* Returns a list of {@link NameValuePair NameValuePairs} as parsed from the given string
* using the given character encoding.
* Query parameter separator pattern.
*/
private static final String QP_SEP_PATTERN = "[" + new String(QP_SEPS) + "]";
/**
* Returns a list of {@link NameValuePair NameValuePairs} as parsed from the given string using the given character
* encoding. By convention, {@code '&'} and {@code ';'} are accepted as parameter separators.
*
* @param s
* text to parse.
* @param charset
* Encoding to use when decoding the parameters.
* @return a list of {@link NameValuePair} as built from the URI's query portion.
*
* @since 4.2
*/
public static List<NameValuePair> parse (final String s, final Charset charset) {
public static List<NameValuePair> parse(final String s, final Charset charset) {
return parse(s, charset, QP_SEPS);
}
/**
* Returns a list of {@link NameValuePair NameValuePairs} as parsed from the given string using the given character
* encoding.
*
* @param s
* text to parse.
* @param charset
* Encoding to use when decoding the parameters.
* @param parameterSeparator
* The characters used to separate parameters, by convention, {@code '&'} and {@code ';'}.
* @return a list of {@link NameValuePair} as built from the URI's query portion.
*
* @since 4.3
*/
public static List<NameValuePair> parse(final String s, final Charset charset, final char... parameterSeparator) {
if (s == null) {
return Collections.emptyList();
}
@ -193,7 +249,7 @@ public class URLEncodedUtils {
final ParserCursor cursor = new ParserCursor(0, buffer.length());
final List<NameValuePair> list = new ArrayList<NameValuePair>();
while (!cursor.atEnd()) {
final NameValuePair nvp = parser.parseNameValuePair(buffer, cursor, DELIM);
final NameValuePair nvp = parser.parseNameValuePair(buffer, cursor, parameterSeparator);
if (nvp.getName().length() > 0) {
list.add(new BasicNameValuePair(
decodeFormFields(nvp.getName(), charset),
@ -204,21 +260,40 @@ public class URLEncodedUtils {
}
/**
* Returns a String that is suitable for use as an <code>application/x-www-form-urlencoded</code>
* Returns a String that is suitable for use as an {@code application/x-www-form-urlencoded}
* list of parameters in an HTTP PUT or HTTP POST.
*
* @param parameters The parameters to include.
* @param encoding The encoding to use.
* @param charset The encoding to use.
* @return An {@code application/x-www-form-urlencoded} string
*/
public static String format (
public static String format(
final List <? extends NameValuePair> parameters,
final String encoding) {
final String charset) {
return format(parameters, QP_SEP_A, charset);
}
/**
* Returns a String that is suitable for use as an {@code application/x-www-form-urlencoded}
* list of parameters in an HTTP PUT or HTTP POST.
*
* @param parameters The parameters to include.
* @param parameterSeparator The parameter separator, by convention, {@code '&'} or {@code ';'}.
* @param charset The encoding to use.
* @return An {@code application/x-www-form-urlencoded} string
*
* @since 4.3
*/
public static String format(
final List <? extends NameValuePair> parameters,
final char parameterSeparator,
final String charset) {
final StringBuilder result = new StringBuilder();
for (final NameValuePair parameter : parameters) {
final String encodedName = encodeFormFields(parameter.getName(), encoding);
final String encodedValue = encodeFormFields(parameter.getValue(), encoding);
final String encodedName = encodeFormFields(parameter.getName(), charset);
final String encodedValue = encodeFormFields(parameter.getValue(), charset);
if (result.length() > 0) {
result.append(PARAMETER_SEPARATOR);
result.append(parameterSeparator);
}
result.append(encodedName);
if (encodedValue != null) {
@ -230,23 +305,42 @@ public class URLEncodedUtils {
}
/**
* Returns a String that is suitable for use as an <code>application/x-www-form-urlencoded</code>
* Returns a String that is suitable for use as an {@code application/x-www-form-urlencoded}
* list of parameters in an HTTP PUT or HTTP POST.
*
* @param parameters The parameters to include.
* @param charset The encoding to use.
* @return An {@code application/x-www-form-urlencoded} string
*
* @since 4.2
*/
public static String format (
public static String format(
final Iterable<? extends NameValuePair> parameters,
final Charset charset) {
return format(parameters, QP_SEP_A, charset);
}
/**
* Returns a String that is suitable for use as an {@code application/x-www-form-urlencoded}
* list of parameters in an HTTP PUT or HTTP POST.
*
* @param parameters The parameters to include.
* @param parameterSeparator The parameter separator, by convention, {@code '&'} or {@code ';'}.
* @param charset The encoding to use.
* @return An {@code application/x-www-form-urlencoded} string
*
* @since 4.3
*/
public static String format(
final Iterable<? extends NameValuePair> parameters,
final char parameterSeparator,
final Charset charset) {
final StringBuilder result = new StringBuilder();
for (final NameValuePair parameter : parameters) {
final String encodedName = encodeFormFields(parameter.getName(), charset);
final String encodedValue = encodeFormFields(parameter.getValue(), charset);
if (result.length() > 0) {
result.append(PARAMETER_SEPARATOR);
result.append(parameterSeparator);
}
result.append(encodedName);
if (encodedValue != null) {
@ -470,12 +564,11 @@ public class URLEncodedUtils {
* @param charset the charset to use
* @return encoded string
*/
private static String encodeFormFields (final String content, final String charset) {
private static String encodeFormFields(final String content, final String charset) {
if (content == null) {
return null;
}
return urlEncode(content, charset != null ? Charset.forName(charset) :
Consts.UTF_8, URLENCODER, true);
return urlEncode(content, charset != null ? Charset.forName(charset) : Consts.UTF_8, URLENCODER, true);
}
/**

View File

@ -223,7 +223,7 @@ public class TestURLEncodedUtils {
}
@Test
public void testParseUTF8String() throws Exception {
public void testParseUTF8Ampersand1String() throws Exception {
final String ru_hello = constructString(RUSSIAN_HELLO);
final String ch_hello = constructString(SWISS_GERMAN_HELLO);
final List <NameValuePair> parameters = new ArrayList<NameValuePair>();
@ -238,6 +238,34 @@ public class TestURLEncodedUtils {
assertNameValuePair(result.get(1), "swiss", ch_hello);
}
@Test
public void testParseUTF8Ampersand2String() throws Exception {
testParseUTF8String('&');
}
@Test
public void testParseUTF8SemicolonString() throws Exception {
testParseUTF8String(';');
}
private void testParseUTF8String(final char parameterSeparator) throws Exception {
final String ru_hello = constructString(RUSSIAN_HELLO);
final String ch_hello = constructString(SWISS_GERMAN_HELLO);
final List <NameValuePair> parameters = new ArrayList<NameValuePair>();
parameters.add(new BasicNameValuePair("russian", ru_hello));
parameters.add(new BasicNameValuePair("swiss", ch_hello));
final String s = URLEncodedUtils.format(parameters, parameterSeparator, Consts.UTF_8);
final List <NameValuePair> result1 = URLEncodedUtils.parse(s, Consts.UTF_8);
Assert.assertEquals(2, result1.size());
assertNameValuePair(result1.get(0), "russian", ru_hello);
assertNameValuePair(result1.get(1), "swiss", ch_hello);
final List <NameValuePair> result2 = URLEncodedUtils.parse(s, Consts.UTF_8, parameterSeparator);
Assert.assertEquals(result1, result2);
}
@Test
public void testParseEntityDefaultContentType() throws Exception {
final String ch_hello = constructString(SWISS_GERMAN_HELLO);
@ -311,6 +339,8 @@ public class TestURLEncodedUtils {
params.add(new BasicNameValuePair("Name7", "b,b"));
params.add(new BasicNameValuePair("Name7", "ccc"));
Assert.assertEquals("Name7=aaa&Name7=b%2Cb&Name7=ccc", URLEncodedUtils.format(params, Consts.ASCII));
Assert.assertEquals("Name7=aaa&Name7=b%2Cb&Name7=ccc", URLEncodedUtils.format(params, '&', Consts.ASCII));
Assert.assertEquals("Name7=aaa;Name7=b%2Cb;Name7=ccc", URLEncodedUtils.format(params, ';', Consts.ASCII));
params.clear();
params.add(new BasicNameValuePair("Name8", "xx, yy ,zz"));