HTTPCLIENT-884: Charset omitted from UrlEncodedFormEntity Content-Type header

Contributed by Jared Jacobs <jmjacobs at cs.stanford.edu>

HTTPCLIENT-885: URLEncodedUtils fails to parse form-url-encoded entities that specify a charset


git-svn-id: https://svn.apache.org/repos/asf/httpcomponents/httpclient/trunk@832855 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Oleg Kalnichevski 2009-11-04 19:38:53 +00:00
parent 20db76c05a
commit 6df23379f7
4 changed files with 74 additions and 17 deletions

View File

@ -1,21 +1,25 @@
Changes since 4.0
-------------------
* [HTTPCLIENT-885] URLEncodedUtils now correctly parses form-url-encoded
entities that specify a charset.
Contributed by Oleg Kalnichevski <olegk at apache.org>
* [HTTPCLIENT-883] SO_TIMEOUT is not reset on persistent (re-used) connections.
Contributed by Oleg Kalnichevski <olegk at apache.org>
* [HTTPCLIENT-882] Auth state is not correctly updated if a successful NTLM
authentication results in a redirect. This is a minor bug as HttpClient manages
to recover from the problem automatically.
authentication results in a redirect. This is a minor bug as HttpClient
manages to recover from the problem automatically.
Contributed by Oleg Kalnichevski <olegk at apache.org>
* [HTTPCLIENT-881] Fixed race condition in AbstractClientConnAdapter that makes it
possible for an aborted connection to be returned to the pool.
* [HTTPCLIENT-881] Fixed race condition in AbstractClientConnAdapter that makes
it possible for an aborted connection to be returned to the pool.
Contributed by Tim Boemker <tboemker at elynx.com> and
Oleg Kalnichevski <olegk at apache.org>
* [HTTPCLIENT-832] Distinguish cookie format errors from violations of restrictions
imposed by a cookie specification. In the latter case
* [HTTPCLIENT-832] Distinguish cookie format errors from violations of
restrictions imposed by a cookie specification. In the latter case
CookieRestrictionViolationException will be thrown.
Contributed by Oleg Kalnichevski <olegk at apache.org>

View File

@ -56,9 +56,9 @@ public class UrlEncodedFormEntity extends StringEntity {
public UrlEncodedFormEntity (
final List <? extends NameValuePair> parameters,
final String encoding) throws UnsupportedEncodingException {
super(URLEncodedUtils.format(parameters, encoding),
encoding);
setContentType(URLEncodedUtils.CONTENT_TYPE);
super(URLEncodedUtils.format(parameters, encoding), encoding);
setContentType(URLEncodedUtils.CONTENT_TYPE + HTTP.CHARSET_PARAM +
(encoding != null ? encoding : HTTP.DEFAULT_CONTENT_CHARSET));
}
/**
@ -70,9 +70,7 @@ public class UrlEncodedFormEntity extends StringEntity {
*/
public UrlEncodedFormEntity (
final List <? extends NameValuePair> parameters) throws UnsupportedEncodingException {
super(URLEncodedUtils.format(parameters, HTTP.DEFAULT_CONTENT_CHARSET),
HTTP.DEFAULT_CONTENT_CHARSET);
setContentType(URLEncodedUtils.CONTENT_TYPE);
this(parameters, HTTP.DEFAULT_CONTENT_CHARSET);
}
}

View File

@ -97,13 +97,28 @@ public class URLEncodedUtils {
public static List <NameValuePair> parse (
final HttpEntity entity) throws IOException {
List <NameValuePair> result = Collections.emptyList();
if (isEncoded(entity)) {
final String content = EntityUtils.toString(entity);
final Header encoding = entity.getContentEncoding();
String contentType = null;
String charset = null;
Header h = entity.getContentType();
if (h != null) {
HeaderElement[] elems = h.getElements();
if (elems.length > 0) {
HeaderElement elem = elems[0];
contentType = elem.getName();
NameValuePair param = elem.getParameterByName("charset");
if (param != null) {
charset = param.getValue();
}
}
}
if (contentType != null && contentType.equalsIgnoreCase(CONTENT_TYPE)) {
final String content = EntityUtils.toString(entity, HTTP.ASCII);
if (content != null && content.length() > 0) {
result = new ArrayList <NameValuePair>();
parse(result, new Scanner(content),
encoding != null ? encoding.getValue() : null);
parse(result, new Scanner(content), charset);
}
}
return result;

View File

@ -38,6 +38,7 @@ import junit.framework.TestSuite;
import org.apache.http.NameValuePair;
import org.apache.http.entity.StringEntity;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.HTTP;
public class TestURLEncodedUtils extends TestCase {
@ -112,6 +113,45 @@ public class TestURLEncodedUtils extends TestCase {
assertTrue(URLEncodedUtils.parse(entity).isEmpty());
}
static final int SWISS_GERMAN_HELLO [] = {
0x47, 0x72, 0xFC, 0x65, 0x7A, 0x69, 0x5F, 0x7A, 0xE4, 0x6D, 0xE4
};
static final int RUSSIAN_HELLO [] = {
0x412, 0x441, 0x435, 0x43C, 0x5F, 0x43F, 0x440, 0x438,
0x432, 0x435, 0x442
};
private static String constructString(int [] unicodeChars) {
StringBuffer buffer = new StringBuffer();
if (unicodeChars != null) {
for (int i = 0; i < unicodeChars.length; i++) {
buffer.append((char)unicodeChars[i]);
}
}
return buffer.toString();
}
public void testParseUTF8Entity () throws Exception {
String ru_hello = constructString(RUSSIAN_HELLO);
String ch_hello = constructString(SWISS_GERMAN_HELLO);
List <NameValuePair> parameters = new ArrayList<NameValuePair>();
parameters.add(new BasicNameValuePair("russian", ru_hello));
parameters.add(new BasicNameValuePair("swiss", ch_hello));
String s = URLEncodedUtils.format(parameters, HTTP.UTF_8);
assertEquals("russian=%D0%92%D1%81%D0%B5%D0%BC_%D0%BF%D1%80%D0%B8%D0%B2%D0%B5%D1%82" +
"&swiss=Gr%C3%BCezi_z%C3%A4m%C3%A4", s);
StringEntity entity = new StringEntity(s, HTTP.UTF_8);
entity.setContentType(URLEncodedUtils.CONTENT_TYPE + HTTP.CHARSET_PARAM + HTTP.UTF_8);
List <NameValuePair> result = URLEncodedUtils.parse(entity);
assertEquals(2, result.size());
assertNameValuePair(result.get(0), "russian", ru_hello);
assertNameValuePair(result.get(1), "swiss", ch_hello);
}
public void testIsEncoded () throws Exception {
final StringEntity entity = new StringEntity("...", null);