SOLR-4283: Improvements for URL-Decoding

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1430396 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2013-01-08 17:22:14 +00:00
parent d65a7fc29a
commit 9ee4b4d5fa
4 changed files with 190 additions and 57 deletions

View File

@ -195,12 +195,14 @@ New Features
that can be set to false to not filter. Its useful when there is already a spatial
filter query but you also need to sort or boost by distance. (David Smiley)
* SOLR-4265: Solr now parses request parameters (in URL or sent with POST using
content-type application/x-www-form-urlencoded) in its dispatcher code. It no
* SOLR-4265, SOLR-4283: Solr now parses request parameters (in URL or sent with POST
using content-type application/x-www-form-urlencoded) in its dispatcher code. It no
longer relies on special configuration settings in Tomcat or other web containers
to enable UTF-8 encoding, which is mandatory for correct Solr behaviour. Also
the maximum length of x-www-form-urlencoded POST parameters can now be configured
through the requestDispatcher/requestParsers/@formdataUploadLimitInKB setting in
to enable UTF-8 encoding, which is mandatory for correct Solr behaviour. Query
strings passed in via the URL need to be properly-%-escaped, UTF-8 encoded
bytes, otherwise Solr refuses to handle the request. The maximum length of
x-www-form-urlencoded POST parameters can now be configured through the
requestDispatcher/requestParsers/@formdataUploadLimitInKB setting in
solrconfig.xml (defaults to 2 MiB). Solr now works out of the box with
e.g. Tomcat, JBoss,... (Uwe Schindler, Dawid Weiss, Alex Rocher)

View File

@ -20,9 +20,13 @@ package org.apache.solr.servlet;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.io.ByteArrayOutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
@ -32,20 +36,20 @@ import java.util.Locale;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.input.BoundedInputStream;
import javax.servlet.http.HttpServletRequest;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.fileupload.servlet.ServletFileUpload;
import org.apache.lucene.util.IOUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MultiMapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.common.util.FastInputStream;
import org.apache.solr.core.Config;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
@ -197,35 +201,138 @@ public class SolrRequestParsers
*/
public static MultiMapSolrParams parseQueryString(String queryString) {
Map<String,String[]> map = new HashMap<String, String[]>();
parseQueryString(queryString, "UTF-8", map);
parseQueryString(queryString, map);
return new MultiMapSolrParams(map);
}
/**
* Given a url-encoded query string, map it into the given map
* Given a url-encoded query string (UTF-8), map it into the given map
* @param queryString as given from URL
* @param charset to be used to decode %-encoding
* @param map place all parameters in this map
*/
static void parseQueryString(String queryString, String charset, Map<String,String[]> map) {
static void parseQueryString(final String queryString, final Map<String,String[]> map) {
if (queryString != null && queryString.length() > 0) {
try {
for( String kv : queryString.split( "&" ) ) {
int idx = kv.indexOf( '=' );
if( idx >= 0 ) {
String name = URLDecoder.decode( kv.substring( 0, idx ), charset);
String value = URLDecoder.decode( kv.substring( idx+1 ), charset);
MultiMapSolrParams.addParam( name, value, map );
final int len = queryString.length();
// this input stream emulates to get the raw bytes from the URL as passed to servlet container, it disallows any byte > 127 and enforces to %-escape them:
final InputStream in = new InputStream() {
int pos = 0;
@Override
public int read() {
if (pos < len) {
final char ch = queryString.charAt(pos);
if (ch > 127) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "URLDecoder: The query string contains a not-%-escaped byte > 127 at position " + pos);
}
pos++;
return ch;
} else {
String name = URLDecoder.decode( kv, charset );
MultiMapSolrParams.addParam( name, "", map );
return -1;
}
}
};
parseFormDataContent(in, Long.MAX_VALUE, IOUtils.CHARSET_UTF_8, map);
} catch (IOException ioe) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, ioe);
}
}
}
catch( UnsupportedEncodingException uex ) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, uex );
/**
* Given a url-encoded form from POST content (as InputStream), map it into the given map.
* The given InputStream should be buffered!
* @param postContent to be parsed
* @param charset to be used to decode resulting bytes after %-decoding
* @param map place all parameters in this map
*/
@SuppressWarnings("fallthrough")
static long parseFormDataContent(final InputStream postContent, final long maxLen, final Charset charset, final Map<String,String[]> map) throws IOException {
final CharsetDecoder charsetDecoder = charset.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
long len = 0L, keyPos = 0L, valuePos = 0L;
final ByteArrayOutputStream2 keyStream = new ByteArrayOutputStream2(),
valueStream = new ByteArrayOutputStream2();
ByteArrayOutputStream2 currentStream = keyStream;
for(;;) {
int b = postContent.read();
switch (b) {
case -1: // end of stream
case '&': // separator
if (keyStream.size() > 0) {
final String key = decodeChars(keyStream, keyPos, charsetDecoder), value = decodeChars(valueStream, valuePos, charsetDecoder);
MultiMapSolrParams.addParam(key, value, map);
} else if (valueStream.size() > 0) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "application/x-www-form-urlencoded invalid: missing key");
}
keyStream.reset();
valueStream.reset();
keyPos = valuePos = len + 1;
currentStream = keyStream;
break;
case '+': // space replacement
currentStream.write(' ');
break;
case '%': // escape
final int upper = digit16(b = postContent.read());
len++;
final int lower = digit16(b = postContent.read());
len++;
currentStream.write(((upper << 4) + lower));
break;
case '=': // kv separator
if (currentStream == keyStream) {
valuePos = len + 1;
currentStream = valueStream;
break;
}
// fall-through
default:
currentStream.write(b);
}
if (b == -1) {
break;
}
len++;
if (len > maxLen) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "application/x-www-form-urlencoded content exceeds upload limit of " + (maxLen/1024L) + " KB");
}
}
return len;
}
private static String decodeChars(ByteArrayOutputStream2 stream, long position, CharsetDecoder charsetDecoder) {
try {
return charsetDecoder.decode(ByteBuffer.wrap(stream.buffer(), 0, stream.size())).toString();
} catch (CharacterCodingException cce) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"URLDecoder: Invalid character encoding detected after position " + position +
" of query string / form data (while parsing as " + charsetDecoder.charset().name() + ")"
);
}
}
/** Makes the buffer of ByteArrayOutputStream available without copy. */
static final class ByteArrayOutputStream2 extends ByteArrayOutputStream {
byte[] buffer() {
return buf;
}
}
private static int digit16(int b) {
if (b == -1) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "URLDecoder: Incomplete trailing escape (%) pattern");
}
if (b >= '0' && b <= '9') {
return b - '0';
}
if (b >= 'A' && b <= 'F') {
return b - ('A' - 10);
}
if (b >= 'a' && b <= 'f') {
return b - ('a' - 10);
}
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "URLDecoder: Invalid digit (" + ((char) b) + ") in escape (%) pattern");
}
public boolean isHandleSelect() {
@ -404,15 +511,12 @@ class FormDataRequestParser implements SolrRequestParser
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Not application/x-www-form-urlencoded content: "+req.getContentType() );
}
String charset = ContentStreamBase.getCharsetFromContentType(req.getContentType());
if (charset == null) charset = "UTF-8";
final Map<String,String[]> map = new HashMap<String, String[]>();
// also add possible URL parameters and include into the map (parsed using UTF-8):
final String qs = req.getQueryString();
if (qs != null) {
SolrRequestParsers.parseQueryString(qs, "UTF-8", map);
SolrRequestParsers.parseQueryString(qs, map);
}
// may be -1, so we check again later. But if its already greater we can stop processing!
@ -424,26 +528,21 @@ class FormDataRequestParser implements SolrRequestParser
}
// get query String from request body, using the charset given in content-type:
final InputStream in;
final String cs = ContentStreamBase.getCharsetFromContentType(req.getContentType());
final Charset charset = (cs == null) ? IOUtils.CHARSET_UTF_8 : Charset.forName(cs);
InputStream in = null;
try {
in = req.getInputStream();
} catch (IllegalStateException ise) {
throw (SolrException) getParameterIncompatibilityException().initCause(ise);
}
try {
final String data = IOUtils.toString(new BoundedInputStream(in, maxLength), charset);
// if there is remaining data in the underlying stream, throw exception:
if (in.read() != -1) {
// read remaining data and throw away:
while (IOUtils.skip(in, 1024L) > 0);
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "application/x-www-form-urlencoded content exceeds upload limit of " + uploadLimitKB + " KB");
}
if (data.length() == 0 && totalLength > 0L) {
final long bytesRead = SolrRequestParsers.parseFormDataContent(FastInputStream.wrap(in), maxLength, charset, map);
if (bytesRead == 0L && totalLength > 0L) {
throw getParameterIncompatibilityException();
}
SolrRequestParsers.parseQueryString(data, charset, map);
} catch (IOException ioe) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, ioe);
} catch (IllegalStateException ise) {
throw (SolrException) getParameterIncompatibilityException().initCause(ise);
} finally {
IOUtils.closeQuietly(in);
IOUtils.closeWhileHandlingException(in);
}
return new MultiMapSolrParams(map);

View File

@ -17,6 +17,8 @@
package org.apache.solr.servlet;
import java.net.URLEncoder;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.util.AbstractSolrTestCase;
import org.junit.BeforeClass;
@ -74,7 +76,7 @@ public class DirectSolrConnectionTest extends AbstractSolrTestCase
// Test using the Stream body parameter
for( String cmd : cmds ) {
direct.request( "/update?"+CommonParams.STREAM_BODY+"="+cmd, null );
direct.request( "/update?"+CommonParams.STREAM_BODY+"="+URLEncoder.encode(cmd, "UTF-8"), null );
}
String got = direct.request( getIt, null );
assertTrue( got.indexOf( value ) > 0 );

View File

@ -25,7 +25,6 @@ import java.io.ByteArrayInputStream;
import java.net.HttpURLConnection;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
@ -115,7 +114,6 @@ public class SolrRequestParserTest extends SolrTestCaseJ4 {
@Test
public void testStreamURL() throws Exception
{
boolean ok = false;
String url = "http://www.apache.org/dist/lucene/solr/";
byte[] bytes = null;
try {
@ -152,19 +150,51 @@ public class SolrRequestParserTest extends SolrTestCaseJ4 {
}
@Test
public void testUrlParamParsing()
public void testUrlParamParsing() throws Exception
{
String[][] teststr = new String[][] {
final String[][] teststr = new String[][] {
{ "this is simple", "this%20is%20simple" },
{ "this is simple", "this+is+simple" },
{ "\u00FC", "%C3%BC" }, // lower-case "u" with diaeresis/umlaut
{ "\u0026", "%26" }, // &
{ "\u20AC", "%E2%82%AC" } // euro
{ "", "" }, // empty
{ "\u20AC", "%E2%82%ac" } // euro, also with lowercase escapes
};
for( String[] tst : teststr ) {
MultiMapSolrParams params = SolrRequestParsers.parseQueryString( "val="+tst[1] );
SolrParams params = SolrRequestParsers.parseQueryString( "val="+tst[1] );
assertEquals( tst[0], params.get( "val" ) );
params = SolrRequestParsers.parseQueryString( "val="+tst[1]+"&" );
assertEquals( tst[0], params.get( "val" ) );
params = SolrRequestParsers.parseQueryString( "&&val="+tst[1]+"&" );
assertEquals( tst[0], params.get( "val" ) );
params = SolrRequestParsers.parseQueryString( "&&val="+tst[1]+"&&&val="+tst[1]+"&" );
assertArrayEquals(new String[]{tst[0],tst[0]}, params.getParams("val") );
}
SolrParams params = SolrRequestParsers.parseQueryString("val");
assertEquals("", params.get("val"));
params = SolrRequestParsers.parseQueryString("val&foo=bar=bar&muh&");
assertEquals("", params.get("val"));
assertEquals("bar=bar", params.get("foo"));
assertEquals("", params.get("muh"));
final String[] invalid = {
"q=h%FCllo", // non-UTF-8
"q=h\u00FCllo", // encoded string is not pure US-ASCII
"q=hallo%", // incomplete escape
"q=hallo%1", // incomplete escape
"q=hallo%XX123", // invalid digit 'X' in escape
"=hallo" // missing key
};
for (String s : invalid) {
try {
SolrRequestParsers.parseQueryString(s);
fail("Should throw SolrException");
} catch (SolrException se) {
// pass
}
}
}
@ -172,7 +202,7 @@ public class SolrRequestParserTest extends SolrTestCaseJ4 {
public void testStandardParseParamsAndFillStreams() throws Exception
{
final String getParams = "qt=%C3%BC&dup=foo", postParams = "q=hello&d%75p=bar";
final byte[] postBytes = postParams.getBytes("UTF-8");
final byte[] postBytes = postParams.getBytes("US-ASCII");
// Set up the expected behavior
final String[] ct = new String[] {
@ -224,7 +254,7 @@ public class SolrRequestParserTest extends SolrTestCaseJ4 {
expect(request.getContentLength()).andReturn(-1).anyTimes();
expect(request.getQueryString()).andReturn(null).anyTimes();
expect(request.getInputStream()).andReturn(new ServletInputStream() {
private final ByteArrayInputStream in = new ByteArrayInputStream(large.toString().getBytes("UTF-8"));
private final ByteArrayInputStream in = new ByteArrayInputStream(large.toString().getBytes("US-ASCII"));
@Override public int read() { return in.read(); }
});
replay(request);