More efficient URL encoded content parser implementation

git-svn-id: https://svn.apache.org/repos/asf/httpcomponents/httpclient/trunk@1647645 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Oleg Kalnichevski 2014-12-23 19:03:04 +00:00
parent c4f47f3489
commit 6921c08154
2 changed files with 97 additions and 45 deletions

View File

@ -28,6 +28,9 @@
package org.apache.http.client.utils;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URI;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
@ -45,12 +48,12 @@ import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.annotation.Immutable;
import org.apache.http.entity.ContentType;
import org.apache.http.message.BasicHeaderValueParser;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.message.ParserCursor;
import org.apache.http.message.TokenParser;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.Args;
import org.apache.http.util.CharArrayBuffer;
import org.apache.http.util.EntityUtils;
/**
* A collection of utilities for encoding URLs.
@ -87,17 +90,14 @@ public class URLEncodedUtils {
public static List <NameValuePair> parse(final URI uri, final String charset) {
final String query = uri.getRawQuery();
if (query != null && !query.isEmpty()) {
final List<NameValuePair> result = new ArrayList<NameValuePair>();
final Scanner scanner = new Scanner(query);
parse(result, scanner, QP_SEP_PATTERN, charset);
return result;
return parse(query, Charset.forName(charset));
}
return Collections.emptyList();
}
/**
* Returns a list of {@link NameValuePair NameValuePairs} as parsed from an {@link HttpEntity}. The encoding is
* taken from the entity's Content-Encoding header.
* Returns a list of {@link NameValuePair NameValuePairs} as parsed from an {@link HttpEntity}.
* The encoding is taken from the entity's Content-Encoding header.
* <p>
* This is typically used while parsing an HTTP POST.
*
@ -110,17 +110,33 @@ public class URLEncodedUtils {
public static List <NameValuePair> parse(
final HttpEntity entity) throws IOException {
final ContentType contentType = ContentType.get(entity);
if (contentType != null && contentType.getMimeType().equalsIgnoreCase(CONTENT_TYPE)) {
final String content = EntityUtils.toString(entity, Consts.ASCII);
if (content != null && !content.isEmpty()) {
Charset charset = contentType.getCharset();
if (charset == null) {
charset = HTTP.DEF_CONTENT_CHARSET;
}
return parse(content, charset, QP_SEPS);
}
if (contentType == null || !contentType.getMimeType().equalsIgnoreCase(CONTENT_TYPE)) {
return Collections.emptyList();
}
return Collections.emptyList();
final long len = entity.getContentLength();
Args.check(len <= Integer.MAX_VALUE, "HTTP entity is too large");
final Charset charset = contentType.getCharset() != null ? contentType.getCharset() : HTTP.DEF_CONTENT_CHARSET;
final InputStream instream = entity.getContent();
if (instream == null) {
return Collections.emptyList();
}
final CharArrayBuffer buf;
try {
buf = new CharArrayBuffer(len > 0 ? (int) len : 1024);
final Reader reader = new InputStreamReader(instream, charset);
final char[] tmp = new char[1024];
int l;
while((l = reader.read(tmp)) != -1) {
buf.append(tmp, 0, l);
}
} finally {
instream.close();
}
if (buf.length() == 0) {
return Collections.emptyList();
}
return parse(buf, charset, QP_SEP_A);
}
/**
@ -151,12 +167,15 @@ public class URLEncodedUtils {
* Input that contains the parameters to parse.
* @param charset
* Encoding to use when decoding the parameters.
*
* @deprecated (4.4) use {@link #parse(String, java.nio.charset.Charset)}
*/
@Deprecated
public static void parse(
final List <NameValuePair> parameters,
final List<NameValuePair> parameters,
final Scanner scanner,
final String charset) {
parse(parameters, scanner, QP_SEP_PATTERN, charset);
parse(parameters, scanner, "[" + QP_SEP_A + QP_SEP_S + "]", charset);
}
/**
@ -174,7 +193,10 @@ public class URLEncodedUtils {
* The Pattern string for parameter separators, by convention {@code "[&;]"}
* @param charset
* Encoding to use when decoding the parameters.
*
* @deprecated (4.4) use {@link #parse(org.apache.http.util.CharArrayBuffer, java.nio.charset.Charset, char...)}
*/
@Deprecated
public static void parse(
final List <NameValuePair> parameters,
final Scanner scanner,
@ -182,8 +204,8 @@ public class URLEncodedUtils {
final String charset) {
scanner.useDelimiter(parameterSepartorPattern);
while (scanner.hasNext()) {
String name = null;
String value = null;
final String name;
final String value;
final String token = scanner.next();
final int i = token.indexOf(NAME_VALUE_SEPARATOR);
if (i != -1) {
@ -191,21 +213,12 @@ public class URLEncodedUtils {
value = decodeFormFields(token.substring(i + 1).trim(), charset);
} else {
name = decodeFormFields(token.trim(), charset);
value = null;
}
parameters.add(new BasicNameValuePair(name, value));
}
}
/**
* Query parameter separators.
*/
private static final char[] QP_SEPS = new char[] { QP_SEP_A, QP_SEP_S };
/**
* Query parameter separator pattern.
*/
private static final String QP_SEP_PATTERN = "[" + new String(QP_SEPS) + "]";
/**
* Returns a list of {@link NameValuePair NameValuePairs} as parsed from the given string using the given character
* encoding. By convention, {@code '&'} and {@code ';'} are accepted as parameter separators.
@ -219,7 +232,9 @@ public class URLEncodedUtils {
* @since 4.2
*/
public static List<NameValuePair> parse(final String s, final Charset charset) {
return parse(s, charset, QP_SEPS);
final CharArrayBuffer buffer = new CharArrayBuffer(s.length());
buffer.append(s);
return parse(buffer, charset, QP_SEP_A, QP_SEP_S);
}
/**
@ -230,27 +245,64 @@ public class URLEncodedUtils {
* text to parse.
* @param charset
* Encoding to use when decoding the parameters.
* @param parameterSeparator
* The characters used to separate parameters, by convention, {@code '&'} and {@code ';'}.
* @param separators
* element separators.
* @return a list of {@link NameValuePair} as built from the URI's query portion.
*
* @since 4.3
*/
public static List<NameValuePair> parse(final String s, final Charset charset, final char... parameterSeparator) {
public static List<NameValuePair> parse(final String s, final Charset charset, final char... separators) {
if (s == null) {
return Collections.emptyList();
}
final BasicHeaderValueParser parser = BasicHeaderValueParser.INSTANCE;
final CharArrayBuffer buffer = new CharArrayBuffer(s.length());
buffer.append(s);
final ParserCursor cursor = new ParserCursor(0, buffer.length());
return parse(buffer, charset, separators);
}
/**
* Returns a list of {@link NameValuePair NameValuePairs} as parsed from the given string using
* the given character encoding.
*
* @param buf
* text to parse.
* @param charset
* Encoding to use when decoding the parameters.
* @param separators
* element separators.
* @return a list of {@link NameValuePair} as built from the URI's query portion.
*
* @since 4.4
*/
public static List<NameValuePair> parse(
final CharArrayBuffer buf, final Charset charset, final char... separators) {
Args.notNull(buf, "Char array buffer");
final TokenParser tokenParser = TokenParser.INSTANCE;
final BitSet delimSet = new BitSet();
for (char separator: separators) {
delimSet.set(separator);
}
final ParserCursor cursor = new ParserCursor(0, buf.length());
final List<NameValuePair> list = new ArrayList<NameValuePair>();
while (!cursor.atEnd()) {
final NameValuePair nvp = parser.parseNameValuePair(buffer, cursor, parameterSeparator);
if (!nvp.getName().isEmpty()) {
delimSet.set('=');
final String name = tokenParser.parseToken(buf, cursor, delimSet);
String value = null;
if (!cursor.atEnd()) {
final int delim = buf.charAt(cursor.getPos());
cursor.updatePos(cursor.getPos() + 1);
if (delim == '=') {
delimSet.clear('=');
value = tokenParser.parseValue(buf, cursor, delimSet);
if (!cursor.atEnd()) {
cursor.updatePos(cursor.getPos() + 1);
}
}
}
if (!name.isEmpty()) {
list.add(new BasicNameValuePair(
decodeFormFields(nvp.getName(), charset),
decodeFormFields(nvp.getValue(), charset)));
decodeFormFields(name, charset),
decodeFormFields(value, charset)));
}
}
return list;
@ -554,7 +606,7 @@ public class URLEncodedUtils {
* Encode/escape www-url-form-encoded content.
* <p>
* Uses the {@link #URLENCODER} set of characters, rather than
* the {@link #UNRSERVED} set; this is for compatibilty with previous
* the {@link #UNRESERVED} set; this is for compatibilty with previous
* releases, URLEncoder.encode() and most browsers.
*
* @param content the content to encode, will convert space to '+'
@ -572,7 +624,7 @@ public class URLEncodedUtils {
* Encode/escape www-url-form-encoded content.
* <p>
* Uses the {@link #URLENCODER} set of characters, rather than
* the {@link #UNRSERVED} set; this is for compatibilty with previous
* the {@link #UNRESERVED} set; this is for compatibilty with previous
* releases, URLEncoder.encode() and most browsers.
*
* @param content the content to encode, will convert space to '+'

View File

@ -192,7 +192,7 @@ public class TestURLEncodedUtils {
};
private static String constructString(final int [] unicodeChars) {
final StringBuffer buffer = new StringBuffer();
final StringBuilder buffer = new StringBuilder();
if (unicodeChars != null) {
for (final int unicodeChar : unicodeChars) {
buffer.append((char)unicodeChar);