mirror of https://github.com/apache/lucene.git
SOLR-5082: The encoding of URL-encoded query parameters can be changed with the "ie" (input encoding) parameter, e.g. "select?q=m%FCller&ie=ISO-8859-1". The default is UTF-8. To change the encoding of POSTed content, use the "Content-Type" HTTP header
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1508236 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0036ab6eca
commit
82a5f35c20
|
@ -71,6 +71,11 @@ New Features
|
||||||
field is referenced via 'sfield' and the query point is constant.
|
field is referenced via 'sfield' and the query point is constant.
|
||||||
(David Smiley)
|
(David Smiley)
|
||||||
|
|
||||||
|
* SOLR-5082: The encoding of URL-encoded query parameters can be changed with
|
||||||
|
the "ie" (input encoding) parameter, e.g. "select?q=m%FCller&ie=ISO-8859-1".
|
||||||
|
The default is UTF-8. To change the encoding of POSTed content, use the
|
||||||
|
"Content-Type" HTTP header. (Uwe Schindler, David Smiley)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -28,9 +28,11 @@ import java.nio.charset.CharsetDecoder;
|
||||||
import java.nio.charset.CodingErrorAction;
|
import java.nio.charset.CodingErrorAction;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -68,6 +70,11 @@ public class SolrRequestParsers
|
||||||
public static final String SIMPLE = "simple";
|
public static final String SIMPLE = "simple";
|
||||||
public static final String STANDARD = "standard";
|
public static final String STANDARD = "standard";
|
||||||
|
|
||||||
|
private static final Charset CHARSET_US_ASCII = Charset.forName("US-ASCII");
|
||||||
|
|
||||||
|
public static final String INPUT_ENCODING_KEY = "ie";
|
||||||
|
private static final byte[] INPUT_ENCODING_BYTES = INPUT_ENCODING_KEY.getBytes(CHARSET_US_ASCII);
|
||||||
|
|
||||||
private final HashMap<String, SolrRequestParser> parsers =
|
private final HashMap<String, SolrRequestParser> parsers =
|
||||||
new HashMap<String, SolrRequestParser>();
|
new HashMap<String, SolrRequestParser>();
|
||||||
private final boolean enableRemoteStreams;
|
private final boolean enableRemoteStreams;
|
||||||
|
@ -242,7 +249,7 @@ public class SolrRequestParsers
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
parseFormDataContent(in, Long.MAX_VALUE, IOUtils.CHARSET_UTF_8, map);
|
parseFormDataContent(in, Long.MAX_VALUE, IOUtils.CHARSET_UTF_8, map, true);
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
throw new SolrException(ErrorCode.BAD_REQUEST, ioe);
|
throw new SolrException(ErrorCode.BAD_REQUEST, ioe);
|
||||||
}
|
}
|
||||||
|
@ -256,23 +263,53 @@ public class SolrRequestParsers
|
||||||
* @param charset to be used to decode resulting bytes after %-decoding
|
* @param charset to be used to decode resulting bytes after %-decoding
|
||||||
* @param map place all parameters in this map
|
* @param map place all parameters in this map
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("fallthrough")
|
@SuppressWarnings({"fallthrough", "resource"})
|
||||||
static long parseFormDataContent(final InputStream postContent, final long maxLen, final Charset charset, final Map<String,String[]> map) throws IOException {
|
static long parseFormDataContent(final InputStream postContent, final long maxLen, Charset charset, final Map<String,String[]> map, boolean supportCharsetParam) throws IOException {
|
||||||
final CharsetDecoder charsetDecoder = charset.newDecoder()
|
CharsetDecoder charsetDecoder = supportCharsetParam ? null : getCharsetDecoder(charset);
|
||||||
.onMalformedInput(CodingErrorAction.REPORT)
|
final LinkedList<Object> buffer = supportCharsetParam ? new LinkedList<Object>() : null;
|
||||||
.onUnmappableCharacter(CodingErrorAction.REPORT);
|
|
||||||
long len = 0L, keyPos = 0L, valuePos = 0L;
|
long len = 0L, keyPos = 0L, valuePos = 0L;
|
||||||
final ByteArrayOutputStream2 keyStream = new ByteArrayOutputStream2(),
|
final ByteArrayOutputStream keyStream = new ByteArrayOutputStream(),
|
||||||
valueStream = new ByteArrayOutputStream2();
|
valueStream = new ByteArrayOutputStream();
|
||||||
ByteArrayOutputStream2 currentStream = keyStream;
|
ByteArrayOutputStream currentStream = keyStream;
|
||||||
for(;;) {
|
for(;;) {
|
||||||
int b = postContent.read();
|
int b = postContent.read();
|
||||||
switch (b) {
|
switch (b) {
|
||||||
case -1: // end of stream
|
case -1: // end of stream
|
||||||
case '&': // separator
|
case '&': // separator
|
||||||
if (keyStream.size() > 0) {
|
if (keyStream.size() > 0) {
|
||||||
final String key = decodeChars(keyStream, keyPos, charsetDecoder), value = decodeChars(valueStream, valuePos, charsetDecoder);
|
final byte[] keyBytes = keyStream.toByteArray(), valueBytes = valueStream.toByteArray();
|
||||||
|
if (Arrays.equals(keyBytes, INPUT_ENCODING_BYTES)) {
|
||||||
|
// we found a charset declaration in the raw bytes
|
||||||
|
if (charsetDecoder != null) {
|
||||||
|
throw new SolrException(ErrorCode.BAD_REQUEST,
|
||||||
|
supportCharsetParam ? (
|
||||||
|
"Query string invalid: duplicate '"+
|
||||||
|
INPUT_ENCODING_KEY + "' (input encoding) key."
|
||||||
|
) : (
|
||||||
|
"Key '" + INPUT_ENCODING_KEY + "' (input encoding) cannot "+
|
||||||
|
"be used in POSTed application/x-www-form-urlencoded form data. "+
|
||||||
|
"To set the input encoding of POSTed form data, use the "+
|
||||||
|
"'Content-Type' header and provide a charset!"
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// decode the charset from raw bytes
|
||||||
|
charset = Charset.forName(decodeChars(valueBytes, keyPos, getCharsetDecoder(CHARSET_US_ASCII)));
|
||||||
|
charsetDecoder = getCharsetDecoder(charset);
|
||||||
|
// finally decode all buffered tokens
|
||||||
|
decodeBuffer(buffer, map, charsetDecoder);
|
||||||
|
} else if (charsetDecoder == null) {
|
||||||
|
// we have no charset decoder until now, buffer the keys / values for later processing:
|
||||||
|
buffer.add(keyBytes);
|
||||||
|
buffer.add(Long.valueOf(keyPos));
|
||||||
|
buffer.add(valueBytes);
|
||||||
|
buffer.add(Long.valueOf(valuePos));
|
||||||
|
} else {
|
||||||
|
// we already have a charsetDecoder, so we can directly decode without buffering:
|
||||||
|
final String key = decodeChars(keyBytes, keyPos, charsetDecoder),
|
||||||
|
value = decodeChars(valueBytes, valuePos, charsetDecoder);
|
||||||
MultiMapSolrParams.addParam(key, value, map);
|
MultiMapSolrParams.addParam(key, value, map);
|
||||||
|
}
|
||||||
} else if (valueStream.size() > 0) {
|
} else if (valueStream.size() > 0) {
|
||||||
throw new SolrException(ErrorCode.BAD_REQUEST, "application/x-www-form-urlencoded invalid: missing key");
|
throw new SolrException(ErrorCode.BAD_REQUEST, "application/x-www-form-urlencoded invalid: missing key");
|
||||||
}
|
}
|
||||||
|
@ -309,12 +346,23 @@ public class SolrRequestParsers
|
||||||
throw new SolrException(ErrorCode.BAD_REQUEST, "application/x-www-form-urlencoded content exceeds upload limit of " + (maxLen/1024L) + " KB");
|
throw new SolrException(ErrorCode.BAD_REQUEST, "application/x-www-form-urlencoded content exceeds upload limit of " + (maxLen/1024L) + " KB");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// if we have not seen a charset declaration, decode the buffer now using the default one (UTF-8 or given via Content-Type):
|
||||||
|
if (buffer != null && !buffer.isEmpty()) {
|
||||||
|
assert charsetDecoder == null;
|
||||||
|
decodeBuffer(buffer, map, getCharsetDecoder(charset));
|
||||||
|
}
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String decodeChars(ByteArrayOutputStream2 stream, long position, CharsetDecoder charsetDecoder) {
|
private static CharsetDecoder getCharsetDecoder(Charset charset) {
|
||||||
|
return charset.newDecoder()
|
||||||
|
.onMalformedInput(CodingErrorAction.REPORT)
|
||||||
|
.onUnmappableCharacter(CodingErrorAction.REPORT);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String decodeChars(byte[] bytes, long position, CharsetDecoder charsetDecoder) {
|
||||||
try {
|
try {
|
||||||
return charsetDecoder.decode(ByteBuffer.wrap(stream.buffer(), 0, stream.size())).toString();
|
return charsetDecoder.decode(ByteBuffer.wrap(bytes)).toString();
|
||||||
} catch (CharacterCodingException cce) {
|
} catch (CharacterCodingException cce) {
|
||||||
throw new SolrException(ErrorCode.BAD_REQUEST,
|
throw new SolrException(ErrorCode.BAD_REQUEST,
|
||||||
"URLDecoder: Invalid character encoding detected after position " + position +
|
"URLDecoder: Invalid character encoding detected after position " + position +
|
||||||
|
@ -323,10 +371,18 @@ public class SolrRequestParsers
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Makes the buffer of ByteArrayOutputStream available without copy. */
|
private static void decodeBuffer(final LinkedList<Object> input, final Map<String,String[]> map, CharsetDecoder charsetDecoder) {
|
||||||
static final class ByteArrayOutputStream2 extends ByteArrayOutputStream {
|
for (final Iterator<Object> it = input.iterator(); it.hasNext(); ) {
|
||||||
byte[] buffer() {
|
final byte[] keyBytes = (byte[]) it.next();
|
||||||
return buf;
|
it.remove();
|
||||||
|
final Long keyPos = (Long) it.next();
|
||||||
|
it.remove();
|
||||||
|
final byte[] valueBytes = (byte[]) it.next();
|
||||||
|
it.remove();
|
||||||
|
final Long valuePos = (Long) it.next();
|
||||||
|
it.remove();
|
||||||
|
MultiMapSolrParams.addParam(decodeChars(keyBytes, keyPos.longValue(), charsetDecoder),
|
||||||
|
decodeChars(valueBytes, valuePos.longValue(), charsetDecoder), map);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -551,7 +607,7 @@ public class SolrRequestParsers
|
||||||
InputStream in = null;
|
InputStream in = null;
|
||||||
try {
|
try {
|
||||||
in = req.getInputStream();
|
in = req.getInputStream();
|
||||||
final long bytesRead = parseFormDataContent(FastInputStream.wrap(in), maxLength, charset, map);
|
final long bytesRead = parseFormDataContent(FastInputStream.wrap(in), maxLength, charset, map, false);
|
||||||
if (bytesRead == 0L && totalLength > 0L) {
|
if (bytesRead == 0L && totalLength > 0L) {
|
||||||
throw getParameterIncompatibilityException();
|
throw getParameterIncompatibilityException();
|
||||||
}
|
}
|
||||||
|
|
|
@ -242,6 +242,38 @@ public class SolrRequestParserTest extends SolrTestCaseJ4 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testStandardParseParamsAndFillStreamsISO88591() throws Exception
|
||||||
|
{
|
||||||
|
final String getParams = "qt=%FC&dup=foo&ie=iso-8859-1&dup=%FC", postParams = "qt2=%FC&q=hello&d%75p=bar";
|
||||||
|
final byte[] postBytes = postParams.getBytes("US-ASCII");
|
||||||
|
final String contentType = "application/x-www-form-urlencoded; charset=iso-8859-1";
|
||||||
|
|
||||||
|
// Set up the expected behavior
|
||||||
|
HttpServletRequest request = createMock(HttpServletRequest.class);
|
||||||
|
expect(request.getMethod()).andReturn("POST").anyTimes();
|
||||||
|
expect(request.getContentType()).andReturn( contentType ).anyTimes();
|
||||||
|
expect(request.getQueryString()).andReturn(getParams).anyTimes();
|
||||||
|
expect(request.getContentLength()).andReturn(postBytes.length).anyTimes();
|
||||||
|
expect(request.getInputStream()).andReturn(new ServletInputStream() {
|
||||||
|
private final ByteArrayInputStream in = new ByteArrayInputStream(postBytes);
|
||||||
|
@Override public int read() { return in.read(); }
|
||||||
|
});
|
||||||
|
replay(request);
|
||||||
|
|
||||||
|
MultipartRequestParser multipart = new MultipartRequestParser( 2048 );
|
||||||
|
RawRequestParser raw = new RawRequestParser();
|
||||||
|
FormDataRequestParser formdata = new FormDataRequestParser( 2048 );
|
||||||
|
StandardRequestParser standard = new StandardRequestParser( multipart, raw, formdata );
|
||||||
|
|
||||||
|
SolrParams p = standard.parseParamsAndFillStreams(request, new ArrayList<ContentStream>());
|
||||||
|
|
||||||
|
assertEquals( "contentType: "+contentType, "hello", p.get("q") );
|
||||||
|
assertEquals( "contentType: "+contentType, "\u00FC", p.get("qt") );
|
||||||
|
assertEquals( "contentType: "+contentType, "\u00FC", p.get("qt2") );
|
||||||
|
assertArrayEquals( "contentType: "+contentType, new String[]{"foo","\u00FC","bar"}, p.getParams("dup") );
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testStandardFormdataUploadLimit() throws Exception
|
public void testStandardFormdataUploadLimit() throws Exception
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue