From de0dc7ecf76aebb7a598b867ad521e5e8e862b28 Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Wed, 2 Jul 2008 22:23:24 +0000 Subject: [PATCH] SOLR-443: declare charset for POST body git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@673528 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 7 ++ .../solrj/impl/CommonsHttpSolrServer.java | 94 +++++++++++++------ 2 files changed, 70 insertions(+), 31 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index d85ed18f5ee..0d87320110f 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -451,6 +451,13 @@ Bug Fixes 38. SOLR-584: Make stats.jsp and stats.xsl more robust. (Yousef Ourabi and hossman) +39. SOLR-443: SolrJ: Declare UTF-8 charset on POSTed parameters + to avoid problems with servlet containers that default to latin-1 + and allow switching of the exact POST mechanism for parameters + via useMultiPartPost in CommonsHttpSolrServer. + (Lars Kotthoff, Andrew Schurman, ryan, yonik) + + Other Changes 1. SOLR-135: Moved common classes to org.apache.solr.common and altered the build scripts to make two jars: apache-solr-1.3.jar and diff --git a/client/java/solrj/src/org/apache/solr/client/solrj/impl/CommonsHttpSolrServer.java b/client/java/solrj/src/org/apache/solr/client/solrj/impl/CommonsHttpSolrServer.java index fc64578693a..f86d3874c8d 100644 --- a/client/java/solrj/src/org/apache/solr/client/solrj/impl/CommonsHttpSolrServer.java +++ b/client/java/solrj/src/org/apache/solr/client/solrj/impl/CommonsHttpSolrServer.java @@ -24,6 +24,8 @@ import java.net.MalformedURLException; import java.net.URL; import java.util.Collection; import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; import java.util.zip.GZIPInputStream; import java.util.zip.InflaterInputStream; @@ -34,6 +36,7 @@ import org.apache.commons.httpclient.methods.PostMethod; import org.apache.commons.httpclient.methods.multipart.MultipartRequestEntity; import org.apache.commons.httpclient.methods.multipart.Part; import org.apache.commons.httpclient.methods.multipart.PartBase; +import org.apache.commons.httpclient.methods.multipart.StringPart; import org.apache.commons.io.IOUtils; import org.apache.solr.client.solrj.ResponseParser; import org.apache.solr.client.solrj.SolrRequest; @@ -69,6 +72,21 @@ public class CommonsHttpSolrServer extends SolrServer private boolean _allowCompression = false; private int _maxRetries = 0; + /** + * If set to false, add the query parameters as URL-encoded parameters to the + * POST request in a single part. If set to true, create a new part of a + * multi-part request for each parameter. + * + * The reason for adding all parameters as parts of a multi-part request is + * that this allows us to specify the charset -- standards for single-part + * requests specify that non-ASCII characters should be URL-encoded, but don't + * specify the charset of the characters to be URL-encoded (cf. + * http://www.w3.org/TR/html401/interact/forms.html#form-content-type). + * Therefore you have to rely on your servlet container doing the right thing + * with single-part requests. + */ + private boolean useMultiPartPost; + /** * @param solrServerUrl The URL of the Solr server. For * example, "http://localhost:8983/solr/" @@ -85,11 +103,15 @@ public class CommonsHttpSolrServer extends SolrServer * will use this SolrServer. */ public CommonsHttpSolrServer(String solrServerUrl, HttpClient httpClient) throws MalformedURLException { - this(new URL(solrServerUrl), httpClient, new BinaryResponseParser()); + this(new URL(solrServerUrl), httpClient, new BinaryResponseParser(), false); + } + + public CommonsHttpSolrServer(String solrServerUrl, HttpClient httpClient, boolean useMultiPartPost) throws MalformedURLException { + this(new URL(solrServerUrl), httpClient, new BinaryResponseParser(), useMultiPartPost); } public CommonsHttpSolrServer(String solrServerUrl, HttpClient httpClient, ResponseParser parser) throws MalformedURLException { - this(new URL(solrServerUrl), httpClient, parser); + this(new URL(solrServerUrl), httpClient, parser, false); } /** @@ -100,15 +122,19 @@ public class CommonsHttpSolrServer extends SolrServer */ public CommonsHttpSolrServer(URL baseURL) { - this(baseURL, null, new BinaryResponseParser()); + this(baseURL, null, new BinaryResponseParser(), false); } public CommonsHttpSolrServer(URL baseURL, HttpClient client){ - this(baseURL, client, new BinaryResponseParser()); + this(baseURL, client, new BinaryResponseParser(), false); + } + + public CommonsHttpSolrServer(URL baseURL, HttpClient client, boolean useMultiPartPost){ + this(baseURL, client, new BinaryResponseParser(), useMultiPartPost); } - public CommonsHttpSolrServer(URL baseURL, HttpClient client, ResponseParser parser) { + public CommonsHttpSolrServer(URL baseURL, HttpClient client, ResponseParser parser, boolean useMultiPartPost) { _baseURL = baseURL.toExternalForm(); if( _baseURL.endsWith( "/" ) ) { _baseURL = _baseURL.substring( 0, _baseURL.length()-1 ); @@ -129,6 +155,8 @@ public class CommonsHttpSolrServer extends SolrServer // by default use the XML one _parser = parser; + + this.useMultiPartPost = useMultiPartPost; } @@ -205,51 +233,55 @@ public class CommonsHttpSolrServer extends SolrServer String url = _baseURL + path; boolean isMultipart = ( streams != null && streams.size() > 1 ); - if( streams == null || isMultipart ) { - // Without streams, just post the parameters - PostMethod post = new PostMethod( url ); + if (streams == null || isMultipart) { + PostMethod post = new PostMethod(url); + post.getParams().setContentCharset("UTF-8"); + if (!this.useMultiPartPost && !isMultipart) { + post.addRequestHeader("Content-Type", + "application/x-www-form-urlencoded; charset=UTF-8"); + } + List parts = new LinkedList(); Iterator iter = params.getParameterNamesIterator(); - while( iter.hasNext() ) { + while (iter.hasNext()) { String p = iter.next(); - String[] vals = params.getParams( p ); - if( vals != null && vals.length > 0 ) { - for( String v : vals ) { - post.addParameter( p, (v==null)?null:v ); + String[] vals = params.getParams(p); + if (vals != null) { + for (String v : vals) { + if (this.useMultiPartPost || isMultipart) { + parts.add(new StringPart(p, v, "UTF-8")); + } else { + post.addParameter(p, v); + } } } - else { - post.addParameter( p, null ); - } } - post.getParams().setContentCharset("UTF-8"); - - if( isMultipart ) { - int i=0; - Part[] parts = new Part[streams.size()]; - - for( ContentStream content : streams ) { + if (isMultipart) { + int i = 0; + for (ContentStream content : streams) { final ContentStream c = content; String charSet = null; String transferEncoding = null; - parts[i++] = new PartBase( c.getName(), c.getContentType(), charSet, transferEncoding ) { + parts.add(new PartBase(c.getName(), c.getContentType(), + charSet, transferEncoding) { @Override protected long lengthOfData() throws IOException { return c.getSize(); } @Override - protected void sendData(OutputStream out) throws IOException { - IOUtils.copy( c.getReader(), out ); + protected void sendData(OutputStream out) + throws IOException { + IOUtils.copy(c.getReader(), out); } - }; + }); } - - // Set the multi-part request - post.setRequestEntity( new MultipartRequestEntity( parts, post.getParams() ) ); - method = post; + } + if (parts.size() > 0) { + post.setRequestEntity(new MultipartRequestEntity(parts + .toArray(new Part[parts.size()]), post.getParams())); } method = post;