SOLR-443: declare charset for POST body

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@673528 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2008-07-02 22:23:24 +00:00
parent 363d51dd88
commit de0dc7ecf7
2 changed files with 70 additions and 31 deletions

View File

@ -451,6 +451,13 @@ Bug Fixes
38. SOLR-584: Make stats.jsp and stats.xsl more robust. 38. SOLR-584: Make stats.jsp and stats.xsl more robust.
(Yousef Ourabi and hossman) (Yousef Ourabi and hossman)
39. SOLR-443: SolrJ: Declare UTF-8 charset on POSTed parameters
to avoid problems with servlet containers that default to latin-1
and allow switching of the exact POST mechanism for parameters
via useMultiPartPost in CommonsHttpSolrServer.
(Lars Kotthoff, Andrew Schurman, ryan, yonik)
Other Changes Other Changes
1. SOLR-135: Moved common classes to org.apache.solr.common and altered the 1. SOLR-135: Moved common classes to org.apache.solr.common and altered the
build scripts to make two jars: apache-solr-1.3.jar and build scripts to make two jars: apache-solr-1.3.jar and

View File

@ -24,6 +24,8 @@ import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.Collection; import java.util.Collection;
import java.util.Iterator; import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.zip.GZIPInputStream; import java.util.zip.GZIPInputStream;
import java.util.zip.InflaterInputStream; import java.util.zip.InflaterInputStream;
@ -34,6 +36,7 @@ import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.methods.multipart.MultipartRequestEntity; import org.apache.commons.httpclient.methods.multipart.MultipartRequestEntity;
import org.apache.commons.httpclient.methods.multipart.Part; import org.apache.commons.httpclient.methods.multipart.Part;
import org.apache.commons.httpclient.methods.multipart.PartBase; import org.apache.commons.httpclient.methods.multipart.PartBase;
import org.apache.commons.httpclient.methods.multipart.StringPart;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.solr.client.solrj.ResponseParser; import org.apache.solr.client.solrj.ResponseParser;
import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrRequest;
@ -69,6 +72,21 @@ public class CommonsHttpSolrServer extends SolrServer
private boolean _allowCompression = false; private boolean _allowCompression = false;
private int _maxRetries = 0; private int _maxRetries = 0;
/**
* If set to false, add the query parameters as URL-encoded parameters to the
* POST request in a single part. If set to true, create a new part of a
* multi-part request for each parameter.
*
* The reason for adding all parameters as parts of a multi-part request is
* that this allows us to specify the charset -- standards for single-part
* requests specify that non-ASCII characters should be URL-encoded, but don't
* specify the charset of the characters to be URL-encoded (cf.
* http://www.w3.org/TR/html401/interact/forms.html#form-content-type).
* Therefore you have to rely on your servlet container doing the right thing
* with single-part requests.
*/
private boolean useMultiPartPost;
/** /**
* @param solrServerUrl The URL of the Solr server. For * @param solrServerUrl The URL of the Solr server. For
* example, "<code>http://localhost:8983/solr/</code>" * example, "<code>http://localhost:8983/solr/</code>"
@ -85,11 +103,15 @@ public class CommonsHttpSolrServer extends SolrServer
* will use this SolrServer. * will use this SolrServer.
*/ */
public CommonsHttpSolrServer(String solrServerUrl, HttpClient httpClient) throws MalformedURLException { public CommonsHttpSolrServer(String solrServerUrl, HttpClient httpClient) throws MalformedURLException {
this(new URL(solrServerUrl), httpClient, new BinaryResponseParser()); this(new URL(solrServerUrl), httpClient, new BinaryResponseParser(), false);
}
public CommonsHttpSolrServer(String solrServerUrl, HttpClient httpClient, boolean useMultiPartPost) throws MalformedURLException {
this(new URL(solrServerUrl), httpClient, new BinaryResponseParser(), useMultiPartPost);
} }
public CommonsHttpSolrServer(String solrServerUrl, HttpClient httpClient, ResponseParser parser) throws MalformedURLException { public CommonsHttpSolrServer(String solrServerUrl, HttpClient httpClient, ResponseParser parser) throws MalformedURLException {
this(new URL(solrServerUrl), httpClient, parser); this(new URL(solrServerUrl), httpClient, parser, false);
} }
/** /**
@ -100,15 +122,19 @@ public class CommonsHttpSolrServer extends SolrServer
*/ */
public CommonsHttpSolrServer(URL baseURL) public CommonsHttpSolrServer(URL baseURL)
{ {
this(baseURL, null, new BinaryResponseParser()); this(baseURL, null, new BinaryResponseParser(), false);
} }
public CommonsHttpSolrServer(URL baseURL, HttpClient client){ public CommonsHttpSolrServer(URL baseURL, HttpClient client){
this(baseURL, client, new BinaryResponseParser()); this(baseURL, client, new BinaryResponseParser(), false);
}
public CommonsHttpSolrServer(URL baseURL, HttpClient client, boolean useMultiPartPost){
this(baseURL, client, new BinaryResponseParser(), useMultiPartPost);
} }
public CommonsHttpSolrServer(URL baseURL, HttpClient client, ResponseParser parser) { public CommonsHttpSolrServer(URL baseURL, HttpClient client, ResponseParser parser, boolean useMultiPartPost) {
_baseURL = baseURL.toExternalForm(); _baseURL = baseURL.toExternalForm();
if( _baseURL.endsWith( "/" ) ) { if( _baseURL.endsWith( "/" ) ) {
_baseURL = _baseURL.substring( 0, _baseURL.length()-1 ); _baseURL = _baseURL.substring( 0, _baseURL.length()-1 );
@ -129,6 +155,8 @@ public class CommonsHttpSolrServer extends SolrServer
// by default use the XML one // by default use the XML one
_parser = parser; _parser = parser;
this.useMultiPartPost = useMultiPartPost;
} }
@ -205,51 +233,55 @@ public class CommonsHttpSolrServer extends SolrServer
String url = _baseURL + path; String url = _baseURL + path;
boolean isMultipart = ( streams != null && streams.size() > 1 ); boolean isMultipart = ( streams != null && streams.size() > 1 );
if( streams == null || isMultipart ) { if (streams == null || isMultipart) {
// Without streams, just post the parameters PostMethod post = new PostMethod(url);
PostMethod post = new PostMethod( url ); post.getParams().setContentCharset("UTF-8");
if (!this.useMultiPartPost && !isMultipart) {
post.addRequestHeader("Content-Type",
"application/x-www-form-urlencoded; charset=UTF-8");
}
List<Part> parts = new LinkedList<Part>();
Iterator<String> iter = params.getParameterNamesIterator(); Iterator<String> iter = params.getParameterNamesIterator();
while( iter.hasNext() ) { while (iter.hasNext()) {
String p = iter.next(); String p = iter.next();
String[] vals = params.getParams( p ); String[] vals = params.getParams(p);
if( vals != null && vals.length > 0 ) { if (vals != null) {
for( String v : vals ) { for (String v : vals) {
post.addParameter( p, (v==null)?null:v ); if (this.useMultiPartPost || isMultipart) {
parts.add(new StringPart(p, v, "UTF-8"));
} else {
post.addParameter(p, v);
}
} }
} }
else {
post.addParameter( p, null );
}
} }
post.getParams().setContentCharset("UTF-8"); if (isMultipart) {
int i = 0;
if( isMultipart ) { for (ContentStream content : streams) {
int i=0;
Part[] parts = new Part[streams.size()];
for( ContentStream content : streams ) {
final ContentStream c = content; final ContentStream c = content;
String charSet = null; String charSet = null;
String transferEncoding = null; String transferEncoding = null;
parts[i++] = new PartBase( c.getName(), c.getContentType(), charSet, transferEncoding ) { parts.add(new PartBase(c.getName(), c.getContentType(),
charSet, transferEncoding) {
@Override @Override
protected long lengthOfData() throws IOException { protected long lengthOfData() throws IOException {
return c.getSize(); return c.getSize();
} }
@Override @Override
protected void sendData(OutputStream out) throws IOException { protected void sendData(OutputStream out)
IOUtils.copy( c.getReader(), out ); throws IOException {
IOUtils.copy(c.getReader(), out);
} }
}; });
} }
}
// Set the multi-part request if (parts.size() > 0) {
post.setRequestEntity( new MultipartRequestEntity( parts, post.getParams() ) ); post.setRequestEntity(new MultipartRequestEntity(parts
method = post; .toArray(new Part[parts.size()]), post.getParams()));
} }
method = post; method = post;