From ca0dedf563721e1bae51483bb8c12e7c728a215c Mon Sep 17 00:00:00 2001 From: Ryan McKinley Date: Thu, 10 May 2007 22:38:10 +0000 Subject: [PATCH] SOLR-231 -- use UTF-8 encoding unless something else is specified. git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@537024 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 3 + .../apache/solr/util/ContentStreamBase.java | 8 +- .../solr/servlet/SolrRequestParsers.java | 110 ++++++++---------- 3 files changed, 57 insertions(+), 64 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index a927e7c2903..ee9d4fde241 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -231,6 +231,9 @@ Changes in runtime behavior codes. To enable solr1.1 style /update, do not map "/update" to any handler in solrconfig.xml (ryan) +10. SOLR-231: If a charset is not specified in the contentType, + ContentStream.getReader() will use UTF-8 encoding. (ryan) + Optimizations 1. SOLR-114: HashDocSet specific implementations of union() and andNot() for a 20x performance improvement for those set operations, and a new diff --git a/src/java/org/apache/solr/util/ContentStreamBase.java b/src/java/org/apache/solr/util/ContentStreamBase.java index 00a3c7c2d4e..890beb90253 100755 --- a/src/java/org/apache/solr/util/ContentStreamBase.java +++ b/src/java/org/apache/solr/util/ContentStreamBase.java @@ -22,6 +22,8 @@ import java.net.URLConnection; */ public abstract class ContentStreamBase implements ContentStream { + public static final String DEFAULT_CHARSET = "utf-8"; + protected String name; protected String sourceInfo; protected String contentType; @@ -137,12 +139,12 @@ public abstract class ContentStreamBase implements ContentStream /** * Base reader implementation. If the contentType declares a - * charset use it, otherwise use the system default. + * charset use it, otherwise use "utf-8". */ public Reader getReader() throws IOException { - String charset = getCharsetFromContentType( contentType ); + String charset = getCharsetFromContentType( getContentType() ); return charset == null - ? new InputStreamReader( getStream() ) + ? new InputStreamReader( getStream(), DEFAULT_CHARSET ) : new InputStreamReader( getStream(), charset ); } diff --git a/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java b/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java index 6420ae7a500..ba33a64867e 100644 --- a/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java +++ b/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java @@ -18,6 +18,7 @@ package org.apache.solr.servlet; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -217,6 +218,52 @@ class SimpleRequestParser implements SolrRequestParser } } +/** + * Wrap an HttpServletRequest as a ContentStream + */ +class HttpRequestContentStream extends ContentStreamBase +{ + private final HttpServletRequest req; + + public HttpRequestContentStream( HttpServletRequest req ) throws IOException { + this.req = req; + + contentType = req.getContentType(); + // name = ??? + // sourceInfo = ??? + + String v = req.getHeader( "Content-Length" ); + if( v != null ) { + size = Long.valueOf( v ); + } + } + + public InputStream getStream() throws IOException { + return req.getInputStream(); + } +} + + +/** + * Wrap a FileItem as a ContentStream + */ +class FileItemContentStream extends ContentStreamBase +{ + private final FileItem item; + + public FileItemContentStream( FileItem f ) + { + item = f; + contentType = item.getContentType(); + name = item.getName(); + sourceInfo = item.getFieldName(); + size = item.getSize(); + } + + public InputStream getStream() throws IOException { + return item.getInputStream(); + } +} /** * The simple parser just uses the params directly @@ -233,33 +280,7 @@ class RawRequestParser implements SolrRequestParser // Rather than return req.getReader(), this uses the default ContentStreamBase method // that checks for charset definitions in the ContentType. - streams.add( new ContentStream() { - public String getContentType() { - return req.getContentType(); - } - public String getName() { - return null; // Is there any meaningful name? - } - public String getSourceInfo() { - return null; // Is there any meaningful source? - } - public Long getSize() { - String v = req.getHeader( "Content-Length" ); - if( v != null ) { - return Long.valueOf( v ); - } - return null; - } - public InputStream getStream() throws IOException { - return req.getInputStream(); - } - public Reader getReader() throws IOException { - String charset = ContentStreamBase.getCharsetFromContentType( req.getContentType() ); - return charset == null - ? new InputStreamReader( getStream() ) - : new InputStreamReader( getStream(), charset ); - } - }); + streams.add( new HttpRequestContentStream( req ) ); return SolrRequestParsers.parseQueryString( req.getQueryString() ); } } @@ -317,40 +338,6 @@ class MultipartRequestParser implements SolrRequestParser } return params; } - - /** - * Wrap a FileItem as a ContentStream - */ - private static class FileItemContentStream extends ContentStreamBase - { - FileItem item; - - public FileItemContentStream( FileItem f ) - { - item = f; - } - - public String getContentType() { - return item.getContentType(); - } - - public String getName() { - return item.getName(); - } - - public InputStream getStream() throws IOException { - return item.getInputStream(); - } - - public String getSourceInfo() { - return item.getFieldName(); - } - - public Long getSize() - { - return item.getSize(); - } - } } @@ -401,3 +388,4 @@ class StandardRequestParser implements SolrRequestParser +