build on aferreira improvements to handle empty parameters with no =, and improve component decoding logic
This commit is contained in:
parent
2d81acbbf2
commit
8b8bad7572
|
@ -23,6 +23,7 @@
|
|||
<w>camelcase</w>
|
||||
<w>canonicalhost</w>
|
||||
<w>charfilter</w>
|
||||
<w>charsets</w>
|
||||
<w>checksum</w>
|
||||
<w>chunking</w>
|
||||
<w>closeable</w>
|
||||
|
|
|
@ -19,9 +19,9 @@
|
|||
|
||||
package org.elasticsearch.rest.support;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URLDecoder;
|
||||
import java.nio.charset.UnsupportedCharsetException;
|
||||
import org.elasticsearch.common.base.Charsets;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
|
@ -29,43 +29,171 @@ import java.util.Map;
|
|||
*/
|
||||
public class RestUtils {
|
||||
|
||||
public static void decodeQueryString(String queryString, int fromIndex, Map<String, String> params) {
|
||||
public static void decodeQueryString(String s, int fromIndex, Map<String, String> params) {
|
||||
if (fromIndex < 0) {
|
||||
return;
|
||||
}
|
||||
if (fromIndex >= queryString.length()) {
|
||||
if (fromIndex >= s.length()) {
|
||||
return;
|
||||
}
|
||||
int toIndex;
|
||||
while ((toIndex = queryString.indexOf('&', fromIndex)) >= 0) {
|
||||
int idx = queryString.indexOf('=', fromIndex);
|
||||
if (fromIndex < idx && idx < toIndex) {
|
||||
params.put(decodeComponent(queryString.substring(fromIndex, idx)), decodeComponent(queryString.substring(idx + 1, toIndex)));
|
||||
|
||||
String name = null;
|
||||
int pos = fromIndex; // Beginning of the unprocessed region
|
||||
int i; // End of the unprocessed region
|
||||
char c = 0; // Current character
|
||||
for (i = fromIndex; i < s.length(); i++) {
|
||||
c = s.charAt(i);
|
||||
if (c == '=' && name == null) {
|
||||
if (pos != i) {
|
||||
name = decodeComponent(s.substring(pos, i));
|
||||
}
|
||||
pos = i + 1;
|
||||
} else if (c == '&') {
|
||||
if (name == null && pos != i) {
|
||||
// We haven't seen an `=' so far but moved forward.
|
||||
// Must be a param of the form '&a&' so add it with
|
||||
// an empty value.
|
||||
addParam(params, decodeComponent(s.substring(pos, i)), "");
|
||||
} else if (name != null) {
|
||||
addParam(params, name, decodeComponent(s.substring(pos, i)));
|
||||
name = null;
|
||||
}
|
||||
pos = i + 1;
|
||||
}
|
||||
fromIndex = toIndex + 1;
|
||||
}
|
||||
int idx = queryString.indexOf('=', fromIndex);
|
||||
if (idx < 0) {
|
||||
return;
|
||||
|
||||
if (pos != i) { // Are there characters we haven't dealt with?
|
||||
if (name == null) { // Yes and we haven't seen any `='.
|
||||
addParam(params, decodeComponent(s.substring(pos, i)), "");
|
||||
} else { // Yes and this must be the last value.
|
||||
addParam(params, name, decodeComponent(s.substring(pos, i)));
|
||||
}
|
||||
} else if (name != null) { // Have we seen a name without value?
|
||||
addParam(params, name, "");
|
||||
}
|
||||
params.put(decodeComponent(queryString.substring(fromIndex, idx)), decodeComponent(queryString.substring(idx + 1)));
|
||||
}
|
||||
|
||||
public static String decodeComponent(String s) {
|
||||
private static void addParam(Map<String, String> params, String name, String value) {
|
||||
params.put(name, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a bit of an URL encoded by a browser.
|
||||
* <p>
|
||||
* This is equivalent to calling {@link #decodeComponent(String, Charset)}
|
||||
* with the UTF-8 charset (recommended to comply with RFC 3986, Section 2).
|
||||
*
|
||||
* @param s The string to decode (can be empty).
|
||||
* @return The decoded string, or {@code s} if there's nothing to decode.
|
||||
* If the string to decode is {@code null}, returns an empty string.
|
||||
* @throws IllegalArgumentException if the string contains a malformed
|
||||
* escape sequence.
|
||||
*/
|
||||
public static String decodeComponent(final String s) {
|
||||
return decodeComponent(s, Charsets.UTF_8);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a bit of an URL encoded by a browser.
|
||||
* <p>
|
||||
* The string is expected to be encoded as per RFC 3986, Section 2.
|
||||
* This is the encoding used by JavaScript functions {@code encodeURI}
|
||||
* and {@code encodeURIComponent}, but not {@code escape}. For example
|
||||
* in this encoding, é (in Unicode {@code U+00E9} or in UTF-8
|
||||
* {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
|
||||
* <p>
|
||||
* This is essentially equivalent to calling
|
||||
* <code>{@link java.net.URLDecoder URLDecoder}.{@link
|
||||
* java.net.URLDecoder#decode(String, String)}</code>
|
||||
* except that it's over 2x faster and generates less garbage for the GC.
|
||||
* Actually this function doesn't allocate any memory if there's nothing
|
||||
* to decode, the argument itself is returned.
|
||||
*
|
||||
* @param s The string to decode (can be empty).
|
||||
* @param charset The charset to use to decode the string (should really
|
||||
* be {@link Charsets#UTF_8}.
|
||||
* @return The decoded string, or {@code s} if there's nothing to decode.
|
||||
* If the string to decode is {@code null}, returns an empty string.
|
||||
* @throws IllegalArgumentException if the string contains a malformed
|
||||
* escape sequence.
|
||||
*/
|
||||
@SuppressWarnings("fallthrough")
|
||||
public static String decodeComponent(final String s, final Charset charset) {
|
||||
if (s == null) {
|
||||
return "";
|
||||
}
|
||||
int numChars = s.length();
|
||||
for (int i = 0; i < numChars; i++) {
|
||||
// do an initial check if it requires decoding do it and return
|
||||
if (s.charAt(i) == '+' || s.charAt(i) == '%') {
|
||||
try {
|
||||
return URLDecoder.decode(s, "UTF8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
throw new UnsupportedCharsetException("UTF8");
|
||||
}
|
||||
final int size = s.length();
|
||||
boolean modified = false;
|
||||
for (int i = 0; i < size; i++) {
|
||||
final char c = s.charAt(i);
|
||||
switch (c) {
|
||||
case '%':
|
||||
i++; // We can skip at least one char, e.g. `%%'.
|
||||
// Fall through.
|
||||
case '+':
|
||||
modified = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return s;
|
||||
if (!modified) {
|
||||
return s;
|
||||
}
|
||||
final byte[] buf = new byte[size];
|
||||
int pos = 0; // position in `buf'.
|
||||
for (int i = 0; i < size; i++) {
|
||||
char c = s.charAt(i);
|
||||
switch (c) {
|
||||
case '+':
|
||||
buf[pos++] = ' '; // "+" -> " "
|
||||
break;
|
||||
case '%':
|
||||
if (i == size - 1) {
|
||||
throw new IllegalArgumentException("unterminated escape"
|
||||
+ " sequence at end of string: " + s);
|
||||
}
|
||||
c = s.charAt(++i);
|
||||
if (c == '%') {
|
||||
buf[pos++] = '%'; // "%%" -> "%"
|
||||
break;
|
||||
} else if (i == size - 1) {
|
||||
throw new IllegalArgumentException("partial escape"
|
||||
+ " sequence at end of string: " + s);
|
||||
}
|
||||
c = decodeHexNibble(c);
|
||||
final char c2 = decodeHexNibble(s.charAt(++i));
|
||||
if (c == Character.MAX_VALUE || c2 == Character.MAX_VALUE) {
|
||||
throw new IllegalArgumentException(
|
||||
"invalid escape sequence `%" + s.charAt(i - 1)
|
||||
+ s.charAt(i) + "' at index " + (i - 2)
|
||||
+ " of: " + s);
|
||||
}
|
||||
c = (char) (c * 16 + c2);
|
||||
// Fall through.
|
||||
default:
|
||||
buf[pos++] = (byte) c;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return new String(buf, 0, pos, charset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to decode half of a hexadecimal number from a string.
|
||||
*
|
||||
* @param c The ASCII character of the hexadecimal number to decode.
|
||||
* Must be in the range {@code [0-9a-fA-F]}.
|
||||
* @return The hexadecimal value represented in the ASCII character
|
||||
* given, or {@link Character#MAX_VALUE} if the character is invalid.
|
||||
*/
|
||||
private static char decodeHexNibble(final char c) {
|
||||
if ('0' <= c && c <= '9') {
|
||||
return (char) (c - '0');
|
||||
} else if ('a' <= c && c <= 'f') {
|
||||
return (char) (c - 'a' + 10);
|
||||
} else if ('A' <= c && c <= 'F') {
|
||||
return (char) (c - 'A' + 10);
|
||||
} else {
|
||||
return Character.MAX_VALUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -84,37 +84,40 @@ public class RestUtilsTests {
|
|||
params.clear();
|
||||
uri = "something?=";
|
||||
RestUtils.decodeQueryString(uri, uri.indexOf('?') + 1, params);
|
||||
assertThat(params.size(), equalTo(1));
|
||||
assertThat(params.get(""), equalTo(""));
|
||||
assertThat(params.size(), equalTo(0));
|
||||
|
||||
params.clear();
|
||||
uri = "something?&=";
|
||||
RestUtils.decodeQueryString(uri, uri.indexOf('?') + 1, params);
|
||||
assertThat(params.size(), equalTo(1));
|
||||
assertThat(params.get(""), equalTo(""));
|
||||
assertThat(params.size(), equalTo(0));
|
||||
|
||||
params.clear();
|
||||
uri = "something?a";
|
||||
RestUtils.decodeQueryString(uri, uri.indexOf('?') + 1, params);
|
||||
assertThat(params.size(), equalTo(0));
|
||||
assertThat(params.size(), equalTo(1));
|
||||
assertThat(params.get("a"), equalTo(""));
|
||||
|
||||
params.clear();
|
||||
uri = "something?p=v&a";
|
||||
RestUtils.decodeQueryString(uri, uri.indexOf('?') + 1, params);
|
||||
assertThat(params.size(), equalTo(1));
|
||||
assertThat(params.size(), equalTo(2));
|
||||
assertThat(params.get("a"), equalTo(""));
|
||||
assertThat(params.get("p"), equalTo("v"));
|
||||
|
||||
params.clear();
|
||||
uri = "something?p=v&a&p1=v1";
|
||||
RestUtils.decodeQueryString(uri, uri.indexOf('?') + 1, params);
|
||||
assertThat(params.size(), equalTo(2));
|
||||
assertThat(params.size(), equalTo(3));
|
||||
assertThat(params.get("a"), equalTo(""));
|
||||
assertThat(params.get("p"), equalTo("v"));
|
||||
assertThat(params.get("p1"), equalTo("v1"));
|
||||
|
||||
params.clear();
|
||||
uri = "something?p=v&a&b&p1=v1";
|
||||
RestUtils.decodeQueryString(uri, uri.indexOf('?') + 1, params);
|
||||
assertThat(params.size(), equalTo(2));
|
||||
assertThat(params.size(), equalTo(4));
|
||||
assertThat(params.get("a"), equalTo(""));
|
||||
assertThat(params.get("b"), equalTo(""));
|
||||
assertThat(params.get("p"), equalTo("v"));
|
||||
assertThat(params.get("p1"), equalTo("v1"));
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue