SOLR-4283: Improvements for URL-Decoding

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1430396 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2013-01-08 17:22:14 +00:00
parent d65a7fc29a
commit 9ee4b4d5fa
4 changed files with 190 additions and 57 deletions

View File

@ -195,12 +195,14 @@ New Features
that can be set to false to not filter. Its useful when there is already a spatial that can be set to false to not filter. Its useful when there is already a spatial
filter query but you also need to sort or boost by distance. (David Smiley) filter query but you also need to sort or boost by distance. (David Smiley)
* SOLR-4265: Solr now parses request parameters (in URL or sent with POST using * SOLR-4265, SOLR-4283: Solr now parses request parameters (in URL or sent with POST
content-type application/x-www-form-urlencoded) in its dispatcher code. It no using content-type application/x-www-form-urlencoded) in its dispatcher code. It no
longer relies on special configuration settings in Tomcat or other web containers longer relies on special configuration settings in Tomcat or other web containers
to enable UTF-8 encoding, which is mandatory for correct Solr behaviour. Also to enable UTF-8 encoding, which is mandatory for correct Solr behaviour. Query
the maximum length of x-www-form-urlencoded POST parameters can now be configured strings passed in via the URL need to be properly-%-escaped, UTF-8 encoded
through the requestDispatcher/requestParsers/@formdataUploadLimitInKB setting in bytes, otherwise Solr refuses to handle the request. The maximum length of
x-www-form-urlencoded POST parameters can now be configured through the
requestDispatcher/requestParsers/@formdataUploadLimitInKB setting in
solrconfig.xml (defaults to 2 MiB). Solr now works out of the box with solrconfig.xml (defaults to 2 MiB). Solr now works out of the box with
e.g. Tomcat, JBoss,... (Uwe Schindler, Dawid Weiss, Alex Rocher) e.g. Tomcat, JBoss,... (Uwe Schindler, Dawid Weiss, Alex Rocher)

View File

@ -20,9 +20,13 @@ package org.apache.solr.servlet;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.UnsupportedEncodingException; import java.io.ByteArrayOutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.net.URL; import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
@ -32,20 +36,20 @@ import java.util.Locale;
import java.util.Map; import java.util.Map;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.input.BoundedInputStream;
import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequest;
import org.apache.commons.fileupload.FileItem; import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.disk.DiskFileItemFactory; import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.fileupload.servlet.ServletFileUpload; import org.apache.commons.fileupload.servlet.ServletFileUpload;
import org.apache.lucene.util.IOUtils;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MultiMapSolrParams; import org.apache.solr.common.params.MultiMapSolrParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream; import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.ContentStreamBase; import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.common.util.FastInputStream;
import org.apache.solr.core.Config; import org.apache.solr.core.Config;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
@ -71,7 +75,7 @@ public class SolrRequestParsers
/** Default instance for e.g. admin requests. Limits to 2 MB uploads and does not allow remote streams. */ /** Default instance for e.g. admin requests. Limits to 2 MB uploads and does not allow remote streams. */
public static final SolrRequestParsers DEFAULT = new SolrRequestParsers(); public static final SolrRequestParsers DEFAULT = new SolrRequestParsers();
/** /**
* Pass in an xml configuration. A null configuration will enable * Pass in an xml configuration. A null configuration will enable
* everything with maximum values. * everything with maximum values.
@ -197,37 +201,140 @@ public class SolrRequestParsers
*/ */
public static MultiMapSolrParams parseQueryString(String queryString) { public static MultiMapSolrParams parseQueryString(String queryString) {
Map<String,String[]> map = new HashMap<String, String[]>(); Map<String,String[]> map = new HashMap<String, String[]>();
parseQueryString(queryString, "UTF-8", map); parseQueryString(queryString, map);
return new MultiMapSolrParams(map); return new MultiMapSolrParams(map);
} }
/** /**
* Given a url-encoded query string, map it into the given map * Given a url-encoded query string (UTF-8), map it into the given map
* @param queryString as given from URL * @param queryString as given from URL
* @param charset to be used to decode %-encoding
* @param map place all parameters in this map * @param map place all parameters in this map
*/ */
static void parseQueryString(String queryString, String charset, Map<String,String[]> map) { static void parseQueryString(final String queryString, final Map<String,String[]> map) {
if( queryString != null && queryString.length() > 0 ) { if (queryString != null && queryString.length() > 0) {
try { try {
for( String kv : queryString.split( "&" ) ) { final int len = queryString.length();
int idx = kv.indexOf( '=' ); // this input stream emulates to get the raw bytes from the URL as passed to servlet container, it disallows any byte > 127 and enforces to %-escape them:
if( idx >= 0 ) { final InputStream in = new InputStream() {
String name = URLDecoder.decode( kv.substring( 0, idx ), charset); int pos = 0;
String value = URLDecoder.decode( kv.substring( idx+1 ), charset); @Override
MultiMapSolrParams.addParam( name, value, map ); public int read() {
} else { if (pos < len) {
String name = URLDecoder.decode( kv, charset ); final char ch = queryString.charAt(pos);
MultiMapSolrParams.addParam( name, "", map ); if (ch > 127) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "URLDecoder: The query string contains a not-%-escaped byte > 127 at position " + pos);
}
pos++;
return ch;
} else {
return -1;
}
} }
} };
} parseFormDataContent(in, Long.MAX_VALUE, IOUtils.CHARSET_UTF_8, map);
catch( UnsupportedEncodingException uex ) { } catch (IOException ioe) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, uex ); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, ioe);
} }
} }
} }
/**
* Given a url-encoded form from POST content (as InputStream), map it into the given map.
* The given InputStream should be buffered!
* @param postContent to be parsed
* @param charset to be used to decode resulting bytes after %-decoding
* @param map place all parameters in this map
*/
@SuppressWarnings("fallthrough")
static long parseFormDataContent(final InputStream postContent, final long maxLen, final Charset charset, final Map<String,String[]> map) throws IOException {
final CharsetDecoder charsetDecoder = charset.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
long len = 0L, keyPos = 0L, valuePos = 0L;
final ByteArrayOutputStream2 keyStream = new ByteArrayOutputStream2(),
valueStream = new ByteArrayOutputStream2();
ByteArrayOutputStream2 currentStream = keyStream;
for(;;) {
int b = postContent.read();
switch (b) {
case -1: // end of stream
case '&': // separator
if (keyStream.size() > 0) {
final String key = decodeChars(keyStream, keyPos, charsetDecoder), value = decodeChars(valueStream, valuePos, charsetDecoder);
MultiMapSolrParams.addParam(key, value, map);
} else if (valueStream.size() > 0) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "application/x-www-form-urlencoded invalid: missing key");
}
keyStream.reset();
valueStream.reset();
keyPos = valuePos = len + 1;
currentStream = keyStream;
break;
case '+': // space replacement
currentStream.write(' ');
break;
case '%': // escape
final int upper = digit16(b = postContent.read());
len++;
final int lower = digit16(b = postContent.read());
len++;
currentStream.write(((upper << 4) + lower));
break;
case '=': // kv separator
if (currentStream == keyStream) {
valuePos = len + 1;
currentStream = valueStream;
break;
}
// fall-through
default:
currentStream.write(b);
}
if (b == -1) {
break;
}
len++;
if (len > maxLen) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "application/x-www-form-urlencoded content exceeds upload limit of " + (maxLen/1024L) + " KB");
}
}
return len;
}
private static String decodeChars(ByteArrayOutputStream2 stream, long position, CharsetDecoder charsetDecoder) {
try {
return charsetDecoder.decode(ByteBuffer.wrap(stream.buffer(), 0, stream.size())).toString();
} catch (CharacterCodingException cce) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"URLDecoder: Invalid character encoding detected after position " + position +
" of query string / form data (while parsing as " + charsetDecoder.charset().name() + ")"
);
}
}
/** Makes the buffer of ByteArrayOutputStream available without copy. */
static final class ByteArrayOutputStream2 extends ByteArrayOutputStream {
byte[] buffer() {
return buf;
}
}
private static int digit16(int b) {
if (b == -1) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "URLDecoder: Incomplete trailing escape (%) pattern");
}
if (b >= '0' && b <= '9') {
return b - '0';
}
if (b >= 'A' && b <= 'F') {
return b - ('A' - 10);
}
if (b >= 'a' && b <= 'f') {
return b - ('a' - 10);
}
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "URLDecoder: Invalid digit (" + ((char) b) + ") in escape (%) pattern");
}
public boolean isHandleSelect() { public boolean isHandleSelect() {
return handleSelect; return handleSelect;
} }
@ -404,15 +511,12 @@ class FormDataRequestParser implements SolrRequestParser
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Not application/x-www-form-urlencoded content: "+req.getContentType() ); throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Not application/x-www-form-urlencoded content: "+req.getContentType() );
} }
String charset = ContentStreamBase.getCharsetFromContentType(req.getContentType());
if (charset == null) charset = "UTF-8";
final Map<String,String[]> map = new HashMap<String, String[]>(); final Map<String,String[]> map = new HashMap<String, String[]>();
// also add possible URL parameters and include into the map (parsed using UTF-8): // also add possible URL parameters and include into the map (parsed using UTF-8):
final String qs = req.getQueryString(); final String qs = req.getQueryString();
if (qs != null) { if (qs != null) {
SolrRequestParsers.parseQueryString(qs, "UTF-8", map); SolrRequestParsers.parseQueryString(qs, map);
} }
// may be -1, so we check again later. But if its already greater we can stop processing! // may be -1, so we check again later. But if its already greater we can stop processing!
@ -424,26 +528,21 @@ class FormDataRequestParser implements SolrRequestParser
} }
// get query String from request body, using the charset given in content-type: // get query String from request body, using the charset given in content-type:
final InputStream in; final String cs = ContentStreamBase.getCharsetFromContentType(req.getContentType());
final Charset charset = (cs == null) ? IOUtils.CHARSET_UTF_8 : Charset.forName(cs);
InputStream in = null;
try { try {
in = req.getInputStream(); in = req.getInputStream();
} catch (IllegalStateException ise) { final long bytesRead = SolrRequestParsers.parseFormDataContent(FastInputStream.wrap(in), maxLength, charset, map);
throw (SolrException) getParameterIncompatibilityException().initCause(ise); if (bytesRead == 0L && totalLength > 0L) {
}
try {
final String data = IOUtils.toString(new BoundedInputStream(in, maxLength), charset);
// if there is remaining data in the underlying stream, throw exception:
if (in.read() != -1) {
// read remaining data and throw away:
while (IOUtils.skip(in, 1024L) > 0);
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "application/x-www-form-urlencoded content exceeds upload limit of " + uploadLimitKB + " KB");
}
if (data.length() == 0 && totalLength > 0L) {
throw getParameterIncompatibilityException(); throw getParameterIncompatibilityException();
} }
SolrRequestParsers.parseQueryString(data, charset, map); } catch (IOException ioe) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, ioe);
} catch (IllegalStateException ise) {
throw (SolrException) getParameterIncompatibilityException().initCause(ise);
} finally { } finally {
IOUtils.closeQuietly(in); IOUtils.closeWhileHandlingException(in);
} }
return new MultiMapSolrParams(map); return new MultiMapSolrParams(map);

View File

@ -17,6 +17,8 @@
package org.apache.solr.servlet; package org.apache.solr.servlet;
import java.net.URLEncoder;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CommonParams;
import org.apache.solr.util.AbstractSolrTestCase; import org.apache.solr.util.AbstractSolrTestCase;
import org.junit.BeforeClass; import org.junit.BeforeClass;
@ -74,7 +76,7 @@ public class DirectSolrConnectionTest extends AbstractSolrTestCase
// Test using the Stream body parameter // Test using the Stream body parameter
for( String cmd : cmds ) { for( String cmd : cmds ) {
direct.request( "/update?"+CommonParams.STREAM_BODY+"="+cmd, null ); direct.request( "/update?"+CommonParams.STREAM_BODY+"="+URLEncoder.encode(cmd, "UTF-8"), null );
} }
String got = direct.request( getIt, null ); String got = direct.request( getIt, null );
assertTrue( got.indexOf( value ) > 0 ); assertTrue( got.indexOf( value ) > 0 );

View File

@ -25,7 +25,6 @@ import java.io.ByteArrayInputStream;
import java.net.HttpURLConnection; import java.net.HttpURLConnection;
import java.net.SocketTimeoutException; import java.net.SocketTimeoutException;
import java.net.URL; import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
@ -115,7 +114,6 @@ public class SolrRequestParserTest extends SolrTestCaseJ4 {
@Test @Test
public void testStreamURL() throws Exception public void testStreamURL() throws Exception
{ {
boolean ok = false;
String url = "http://www.apache.org/dist/lucene/solr/"; String url = "http://www.apache.org/dist/lucene/solr/";
byte[] bytes = null; byte[] bytes = null;
try { try {
@ -152,19 +150,51 @@ public class SolrRequestParserTest extends SolrTestCaseJ4 {
} }
@Test @Test
public void testUrlParamParsing() public void testUrlParamParsing() throws Exception
{ {
String[][] teststr = new String[][] { final String[][] teststr = new String[][] {
{ "this is simple", "this%20is%20simple" }, { "this is simple", "this%20is%20simple" },
{ "this is simple", "this+is+simple" }, { "this is simple", "this+is+simple" },
{ "\u00FC", "%C3%BC" }, // lower-case "u" with diaeresis/umlaut { "\u00FC", "%C3%BC" }, // lower-case "u" with diaeresis/umlaut
{ "\u0026", "%26" }, // & { "\u0026", "%26" }, // &
{ "\u20AC", "%E2%82%AC" } // euro { "", "" }, // empty
{ "\u20AC", "%E2%82%ac" } // euro, also with lowercase escapes
}; };
for( String[] tst : teststr ) { for( String[] tst : teststr ) {
MultiMapSolrParams params = SolrRequestParsers.parseQueryString( "val="+tst[1] ); SolrParams params = SolrRequestParsers.parseQueryString( "val="+tst[1] );
assertEquals( tst[0], params.get( "val" ) ); assertEquals( tst[0], params.get( "val" ) );
params = SolrRequestParsers.parseQueryString( "val="+tst[1]+"&" );
assertEquals( tst[0], params.get( "val" ) );
params = SolrRequestParsers.parseQueryString( "&&val="+tst[1]+"&" );
assertEquals( tst[0], params.get( "val" ) );
params = SolrRequestParsers.parseQueryString( "&&val="+tst[1]+"&&&val="+tst[1]+"&" );
assertArrayEquals(new String[]{tst[0],tst[0]}, params.getParams("val") );
}
SolrParams params = SolrRequestParsers.parseQueryString("val");
assertEquals("", params.get("val"));
params = SolrRequestParsers.parseQueryString("val&foo=bar=bar&muh&");
assertEquals("", params.get("val"));
assertEquals("bar=bar", params.get("foo"));
assertEquals("", params.get("muh"));
final String[] invalid = {
"q=h%FCllo", // non-UTF-8
"q=h\u00FCllo", // encoded string is not pure US-ASCII
"q=hallo%", // incomplete escape
"q=hallo%1", // incomplete escape
"q=hallo%XX123", // invalid digit 'X' in escape
"=hallo" // missing key
};
for (String s : invalid) {
try {
SolrRequestParsers.parseQueryString(s);
fail("Should throw SolrException");
} catch (SolrException se) {
// pass
}
} }
} }
@ -172,7 +202,7 @@ public class SolrRequestParserTest extends SolrTestCaseJ4 {
public void testStandardParseParamsAndFillStreams() throws Exception public void testStandardParseParamsAndFillStreams() throws Exception
{ {
final String getParams = "qt=%C3%BC&dup=foo", postParams = "q=hello&d%75p=bar"; final String getParams = "qt=%C3%BC&dup=foo", postParams = "q=hello&d%75p=bar";
final byte[] postBytes = postParams.getBytes("UTF-8"); final byte[] postBytes = postParams.getBytes("US-ASCII");
// Set up the expected behavior // Set up the expected behavior
final String[] ct = new String[] { final String[] ct = new String[] {
@ -224,7 +254,7 @@ public class SolrRequestParserTest extends SolrTestCaseJ4 {
expect(request.getContentLength()).andReturn(-1).anyTimes(); expect(request.getContentLength()).andReturn(-1).anyTimes();
expect(request.getQueryString()).andReturn(null).anyTimes(); expect(request.getQueryString()).andReturn(null).anyTimes();
expect(request.getInputStream()).andReturn(new ServletInputStream() { expect(request.getInputStream()).andReturn(new ServletInputStream() {
private final ByteArrayInputStream in = new ByteArrayInputStream(large.toString().getBytes("UTF-8")); private final ByteArrayInputStream in = new ByteArrayInputStream(large.toString().getBytes("US-ASCII"));
@Override public int read() { return in.read(); } @Override public int read() { return in.read(); }
}); });
replay(request); replay(request);