SOLR-231 -- use UTF-8 encoding unless something else is specified.

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@537024 13f79535-47bb-0310-9956-ffa450edef68
2007-05-10 22:38:10 +00:00 · 2007-05-10 22:38:10 +00:00 · ca0dedf563
parent f4986af881
commit ca0dedf563
3 changed files with 57 additions and 64 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -231,6 +231,9 @@ Changes in runtime behavior
    codes.  To enable solr1.1 style /update, do not map "/update" to any 
    handler in solrconfig.xml (ryan)
 10. SOLR-231: If a charset is not specified in the contentType, 
    ContentStream.getReader() will use UTF-8 encoding.  (ryan)
 Optimizations 
 1. SOLR-114: HashDocSet specific implementations of union() and andNot()
    for a 20x performance improvement for those set operations, and a new
--- a/src/java/org/apache/solr/util/ContentStreamBase.java
+++ b/src/java/org/apache/solr/util/ContentStreamBase.java
@ -22,6 +22,8 @@ import java.net.URLConnection;
 */
 public abstract class ContentStreamBase implements ContentStream
 {
  public static final String DEFAULT_CHARSET = "utf-8";
  protected String name;
  protected String sourceInfo;
  protected String contentType;
@ -137,12 +139,12 @@ public abstract class ContentStreamBase implements ContentStream
  /**
   * Base reader implementation.  If the contentType declares a 
-   * charset use it, otherwise use the system default.
+   * charset use it, otherwise use "utf-8".
   */
  public Reader getReader() throws IOException {
-    String charset = getCharsetFromContentType( contentType );
+    String charset = getCharsetFromContentType( getContentType() );
    return charset == null 
-      ? new InputStreamReader( getStream() )
+      ? new InputStreamReader( getStream(), DEFAULT_CHARSET )
      : new InputStreamReader( getStream(), charset );
  }
--- a/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java
+++ b/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java
@ -18,6 +18,7 @@
 package org.apache.solr.servlet;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
@ -217,6 +218,52 @@ class SimpleRequestParser implements SolrRequestParser
  }
 }
 /**
 * Wrap an HttpServletRequest as a ContentStream
 */
 class HttpRequestContentStream extends ContentStreamBase
 {
  private final HttpServletRequest req;
  public HttpRequestContentStream( HttpServletRequest req ) throws IOException {
    this.req = req;
    contentType = req.getContentType();
    // name = ???
    // sourceInfo = ???
    String v = req.getHeader( "Content-Length" );
    if( v != null ) {
      size = Long.valueOf( v );
    }
  }
  public InputStream getStream() throws IOException {
    return req.getInputStream();
  }
 }
 /**
 * Wrap a FileItem as a ContentStream
 */
 class FileItemContentStream extends ContentStreamBase
 {
  private final FileItem item;
  public FileItemContentStream( FileItem f )
  {
    item = f;
    contentType = item.getContentType();
    name = item.getName();
    sourceInfo = item.getFieldName();
    size = item.getSize();
  }
  public InputStream getStream() throws IOException {
    return item.getInputStream();
  }
 }
 /**
 * The simple parser just uses the params directly
@ -233,33 +280,7 @@ class RawRequestParser implements SolrRequestParser
    // Rather than return req.getReader(), this uses the default ContentStreamBase method
    // that checks for charset definitions in the ContentType.
-    streams.add( new ContentStream() {
+    streams.add( new HttpRequestContentStream( req ) );
      public String getContentType() {
        return req.getContentType();
      }
      public String getName() {
        return null; // Is there any meaningful name?
      }
      public String getSourceInfo() {
        return null; // Is there any meaningful source?
      }
      public Long getSize() { 
        String v = req.getHeader( "Content-Length" );
        if( v != null ) {
          return Long.valueOf( v );
        }
        return null; 
      }
      public InputStream getStream() throws IOException {
        return req.getInputStream();
      }
      public Reader getReader() throws IOException {
        String charset = ContentStreamBase.getCharsetFromContentType( req.getContentType() );
        return charset == null 
          ? new InputStreamReader( getStream() )
          : new InputStreamReader( getStream(), charset );
      }
    });
    return SolrRequestParsers.parseQueryString( req.getQueryString() );
  }
 }
@ -317,40 +338,6 @@ class MultipartRequestParser implements SolrRequestParser
    }
    return params;
  }
  /**
   * Wrap a FileItem as a ContentStream
   */
  private static class FileItemContentStream extends ContentStreamBase
  {
    FileItem item;
    public FileItemContentStream( FileItem f )
    {
      item = f;
    }
    public String getContentType() {
      return item.getContentType();
    }
    public String getName() {
      return item.getName();
    }
    public InputStream getStream() throws IOException {
      return item.getInputStream();
    }
    public String getSourceInfo() {
      return item.getFieldName();
    }
    public Long getSize()
    {
      return item.getSize();
    }
  }
 }
@ -401,3 +388,4 @@ class StandardRequestParser implements SolrRequestParser