From c3df2607b0acdcce287da0c8f116decad4096d3e Mon Sep 17 00:00:00 2001 From: Ryan McKinley Date: Mon, 7 May 2007 23:35:55 +0000 Subject: [PATCH] SOLR-214 - use the charset encoded in the contentType to decode the posted text. Even though they are supposed to, some containers do not obey this specification. git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@536019 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 7 +++++++ .../solr/servlet/SolrRequestParsers.java | 21 ++++++++++++------- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index bcb0ffa26a6..d92ad600957 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -271,6 +271,13 @@ Bug Fixes 13. Changed the SOLR-104 RequestDispatcher so that /select?qt=xxx can not access handlers that start with "/". This makes path based authentication possible for path based request handlers. (ryan) + +14. SOLR-214: Some servlet containers (including Tomcat and Resin) do not + obey the specified charset. Rather then letting the the container handle + it solr now uses the charset from the header contentType to decode posted + content. Using the contentType: "text/xml; charset=utf-8" will force + utf-8 encoding. If you do not specify a contentType, it will use the + platform default. (Koji Sekiguchi via ryan) Other Changes 1. Updated to Lucene 2.1 diff --git a/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java b/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java index 25d7816c50f..3615aa43a8c 100644 --- a/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java +++ b/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java @@ -20,7 +20,6 @@ package org.apache.solr.servlet; import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.io.Reader; import java.io.UnsupportedEncodingException; import java.net.URL; import java.net.URLDecoder; @@ -225,16 +224,27 @@ class RawRequestParser implements SolrRequestParser public SolrParams parseParamsAndFillStreams( final HttpServletRequest req, ArrayList streams ) throws Exception { - streams.add( new ContentStream() { + // The javadocs for HttpServletRequest are clear that req.getReader() should take + // care of any character encoding issues. BUT, there are problems while running on + // some servlet containers: including Tomcat 5 and resin. + // + // Rather than return req.getReader(), this uses the default ContentStreamBase method + // that checks for charset definitions in the ContentType. + + streams.add( new ContentStreamBase() { + @Override public String getContentType() { return req.getContentType(); } + @Override public String getName() { - return null; // Is there any meaningfull name? + return null; // Is there any meaningful name? } + @Override public String getSourceInfo() { - return null; // Is there any meaningfull name? + return null; // Is there any meaningful source? } + @Override public Long getSize() { String v = req.getHeader( "Content-Length" ); if( v != null ) { @@ -245,9 +255,6 @@ class RawRequestParser implements SolrRequestParser public InputStream getStream() throws IOException { return req.getInputStream(); } - public Reader getReader() throws IOException { - return req.getReader(); - } }); return SolrRequestParsers.parseQueryString( req.getQueryString() ); }