build on aferreira improvements to handle empty parameters with no =, and improve component decoding logic

This commit is contained in:
kimchy 2011-01-07 14:34:34 +02:00
parent 2d81acbbf2
commit 8b8bad7572
3 changed files with 166 additions and 34 deletions

View File

@ -23,6 +23,7 @@
<w>camelcase</w>
<w>canonicalhost</w>
<w>charfilter</w>
<w>charsets</w>
<w>checksum</w>
<w>chunking</w>
<w>closeable</w>

View File

@ -19,9 +19,9 @@
package org.elasticsearch.rest.support;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.charset.UnsupportedCharsetException;
import org.elasticsearch.common.base.Charsets;
import java.nio.charset.Charset;
import java.util.Map;
/**
@ -29,43 +29,171 @@ import java.util.Map;
*/
public class RestUtils {
public static void decodeQueryString(String queryString, int fromIndex, Map<String, String> params) {
public static void decodeQueryString(String s, int fromIndex, Map<String, String> params) {
if (fromIndex < 0) {
return;
}
if (fromIndex >= queryString.length()) {
if (fromIndex >= s.length()) {
return;
}
int toIndex;
while ((toIndex = queryString.indexOf('&', fromIndex)) >= 0) {
int idx = queryString.indexOf('=', fromIndex);
if (fromIndex < idx && idx < toIndex) {
params.put(decodeComponent(queryString.substring(fromIndex, idx)), decodeComponent(queryString.substring(idx + 1, toIndex)));
String name = null;
int pos = fromIndex; // Beginning of the unprocessed region
int i; // End of the unprocessed region
char c = 0; // Current character
for (i = fromIndex; i < s.length(); i++) {
c = s.charAt(i);
if (c == '=' && name == null) {
if (pos != i) {
name = decodeComponent(s.substring(pos, i));
}
pos = i + 1;
} else if (c == '&') {
if (name == null && pos != i) {
// We haven't seen an `=' so far but moved forward.
// Must be a param of the form '&a&' so add it with
// an empty value.
addParam(params, decodeComponent(s.substring(pos, i)), "");
} else if (name != null) {
addParam(params, name, decodeComponent(s.substring(pos, i)));
name = null;
}
pos = i + 1;
}
fromIndex = toIndex + 1;
}
int idx = queryString.indexOf('=', fromIndex);
if (idx < 0) {
return;
if (pos != i) { // Are there characters we haven't dealt with?
if (name == null) { // Yes and we haven't seen any `='.
addParam(params, decodeComponent(s.substring(pos, i)), "");
} else { // Yes and this must be the last value.
addParam(params, name, decodeComponent(s.substring(pos, i)));
}
} else if (name != null) { // Have we seen a name without value?
addParam(params, name, "");
}
params.put(decodeComponent(queryString.substring(fromIndex, idx)), decodeComponent(queryString.substring(idx + 1)));
}
public static String decodeComponent(String s) {
private static void addParam(Map<String, String> params, String name, String value) {
params.put(name, value);
}
/**
* Decodes a bit of an URL encoded by a browser.
* <p>
* This is equivalent to calling {@link #decodeComponent(String, Charset)}
* with the UTF-8 charset (recommended to comply with RFC 3986, Section 2).
*
* @param s The string to decode (can be empty).
* @return The decoded string, or {@code s} if there's nothing to decode.
* If the string to decode is {@code null}, returns an empty string.
* @throws IllegalArgumentException if the string contains a malformed
* escape sequence.
*/
public static String decodeComponent(final String s) {
return decodeComponent(s, Charsets.UTF_8);
}
/**
* Decodes a bit of an URL encoded by a browser.
* <p>
* The string is expected to be encoded as per RFC 3986, Section 2.
* This is the encoding used by JavaScript functions {@code encodeURI}
* and {@code encodeURIComponent}, but not {@code escape}. For example
* in this encoding, &eacute; (in Unicode {@code U+00E9} or in UTF-8
* {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
* <p>
* This is essentially equivalent to calling
* <code>{@link java.net.URLDecoder URLDecoder}.{@link
* java.net.URLDecoder#decode(String, String)}</code>
* except that it's over 2x faster and generates less garbage for the GC.
* Actually this function doesn't allocate any memory if there's nothing
* to decode, the argument itself is returned.
*
* @param s The string to decode (can be empty).
* @param charset The charset to use to decode the string (should really
* be {@link Charsets#UTF_8}.
* @return The decoded string, or {@code s} if there's nothing to decode.
* If the string to decode is {@code null}, returns an empty string.
* @throws IllegalArgumentException if the string contains a malformed
* escape sequence.
*/
@SuppressWarnings("fallthrough")
public static String decodeComponent(final String s, final Charset charset) {
if (s == null) {
return "";
}
int numChars = s.length();
for (int i = 0; i < numChars; i++) {
// do an initial check if it requires decoding do it and return
if (s.charAt(i) == '+' || s.charAt(i) == '%') {
try {
return URLDecoder.decode(s, "UTF8");
} catch (UnsupportedEncodingException e) {
throw new UnsupportedCharsetException("UTF8");
}
final int size = s.length();
boolean modified = false;
for (int i = 0; i < size; i++) {
final char c = s.charAt(i);
switch (c) {
case '%':
i++; // We can skip at least one char, e.g. `%%'.
// Fall through.
case '+':
modified = true;
break;
}
}
return s;
if (!modified) {
return s;
}
final byte[] buf = new byte[size];
int pos = 0; // position in `buf'.
for (int i = 0; i < size; i++) {
char c = s.charAt(i);
switch (c) {
case '+':
buf[pos++] = ' '; // "+" -> " "
break;
case '%':
if (i == size - 1) {
throw new IllegalArgumentException("unterminated escape"
+ " sequence at end of string: " + s);
}
c = s.charAt(++i);
if (c == '%') {
buf[pos++] = '%'; // "%%" -> "%"
break;
} else if (i == size - 1) {
throw new IllegalArgumentException("partial escape"
+ " sequence at end of string: " + s);
}
c = decodeHexNibble(c);
final char c2 = decodeHexNibble(s.charAt(++i));
if (c == Character.MAX_VALUE || c2 == Character.MAX_VALUE) {
throw new IllegalArgumentException(
"invalid escape sequence `%" + s.charAt(i - 1)
+ s.charAt(i) + "' at index " + (i - 2)
+ " of: " + s);
}
c = (char) (c * 16 + c2);
// Fall through.
default:
buf[pos++] = (byte) c;
break;
}
}
return new String(buf, 0, pos, charset);
}
/**
* Helper to decode half of a hexadecimal number from a string.
*
* @param c The ASCII character of the hexadecimal number to decode.
* Must be in the range {@code [0-9a-fA-F]}.
* @return The hexadecimal value represented in the ASCII character
* given, or {@link Character#MAX_VALUE} if the character is invalid.
*/
private static char decodeHexNibble(final char c) {
if ('0' <= c && c <= '9') {
return (char) (c - '0');
} else if ('a' <= c && c <= 'f') {
return (char) (c - 'a' + 10);
} else if ('A' <= c && c <= 'F') {
return (char) (c - 'A' + 10);
} else {
return Character.MAX_VALUE;
}
}
}

View File

@ -84,37 +84,40 @@ public class RestUtilsTests {
params.clear();
uri = "something?=";
RestUtils.decodeQueryString(uri, uri.indexOf('?') + 1, params);
assertThat(params.size(), equalTo(1));
assertThat(params.get(""), equalTo(""));
assertThat(params.size(), equalTo(0));
params.clear();
uri = "something?&=";
RestUtils.decodeQueryString(uri, uri.indexOf('?') + 1, params);
assertThat(params.size(), equalTo(1));
assertThat(params.get(""), equalTo(""));
assertThat(params.size(), equalTo(0));
params.clear();
uri = "something?a";
RestUtils.decodeQueryString(uri, uri.indexOf('?') + 1, params);
assertThat(params.size(), equalTo(0));
assertThat(params.size(), equalTo(1));
assertThat(params.get("a"), equalTo(""));
params.clear();
uri = "something?p=v&a";
RestUtils.decodeQueryString(uri, uri.indexOf('?') + 1, params);
assertThat(params.size(), equalTo(1));
assertThat(params.size(), equalTo(2));
assertThat(params.get("a"), equalTo(""));
assertThat(params.get("p"), equalTo("v"));
params.clear();
uri = "something?p=v&a&p1=v1";
RestUtils.decodeQueryString(uri, uri.indexOf('?') + 1, params);
assertThat(params.size(), equalTo(2));
assertThat(params.size(), equalTo(3));
assertThat(params.get("a"), equalTo(""));
assertThat(params.get("p"), equalTo("v"));
assertThat(params.get("p1"), equalTo("v1"));
params.clear();
uri = "something?p=v&a&b&p1=v1";
RestUtils.decodeQueryString(uri, uri.indexOf('?') + 1, params);
assertThat(params.size(), equalTo(2));
assertThat(params.size(), equalTo(4));
assertThat(params.get("a"), equalTo(""));
assertThat(params.get("b"), equalTo(""));
assertThat(params.get("p"), equalTo("v"));
assertThat(params.get("p1"), equalTo("v1"));
}