SOLR-214 - use the charset encoded in the contentType to decode the posted text. Even though they are supposed to, some containers do not obey this specification.

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@536019 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Ryan McKinley 2007-05-07 23:35:55 +00:00
parent a461ab4cc5
commit c3df2607b0
2 changed files with 21 additions and 7 deletions

View File

@ -271,6 +271,13 @@ Bug Fixes
13. Changed the SOLR-104 RequestDispatcher so that /select?qt=xxx can not 13. Changed the SOLR-104 RequestDispatcher so that /select?qt=xxx can not
access handlers that start with "/". This makes path based authentication access handlers that start with "/". This makes path based authentication
possible for path based request handlers. (ryan) possible for path based request handlers. (ryan)
14. SOLR-214: Some servlet containers (including Tomcat and Resin) do not
obey the specified charset. Rather then letting the the container handle
it solr now uses the charset from the header contentType to decode posted
content. Using the contentType: "text/xml; charset=utf-8" will force
utf-8 encoding. If you do not specify a contentType, it will use the
platform default. (Koji Sekiguchi via ryan)
Other Changes Other Changes
1. Updated to Lucene 2.1 1. Updated to Lucene 2.1

View File

@ -20,7 +20,6 @@ package org.apache.solr.servlet;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.Reader;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.URL; import java.net.URL;
import java.net.URLDecoder; import java.net.URLDecoder;
@ -225,16 +224,27 @@ class RawRequestParser implements SolrRequestParser
public SolrParams parseParamsAndFillStreams( public SolrParams parseParamsAndFillStreams(
final HttpServletRequest req, ArrayList<ContentStream> streams ) throws Exception final HttpServletRequest req, ArrayList<ContentStream> streams ) throws Exception
{ {
streams.add( new ContentStream() { // The javadocs for HttpServletRequest are clear that req.getReader() should take
// care of any character encoding issues. BUT, there are problems while running on
// some servlet containers: including Tomcat 5 and resin.
//
// Rather than return req.getReader(), this uses the default ContentStreamBase method
// that checks for charset definitions in the ContentType.
streams.add( new ContentStreamBase() {
@Override
public String getContentType() { public String getContentType() {
return req.getContentType(); return req.getContentType();
} }
@Override
public String getName() { public String getName() {
return null; // Is there any meaningfull name? return null; // Is there any meaningful name?
} }
@Override
public String getSourceInfo() { public String getSourceInfo() {
return null; // Is there any meaningfull name? return null; // Is there any meaningful source?
} }
@Override
public Long getSize() { public Long getSize() {
String v = req.getHeader( "Content-Length" ); String v = req.getHeader( "Content-Length" );
if( v != null ) { if( v != null ) {
@ -245,9 +255,6 @@ class RawRequestParser implements SolrRequestParser
public InputStream getStream() throws IOException { public InputStream getStream() throws IOException {
return req.getInputStream(); return req.getInputStream();
} }
public Reader getReader() throws IOException {
return req.getReader();
}
}); });
return SolrRequestParsers.parseQueryString( req.getQueryString() ); return SolrRequestParsers.parseQueryString( req.getQueryString() );
} }