SOLR-231 -- use UTF-8 encoding unless something else is specified.

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@537024 13f79535-47bb-0310-9956-ffa450edef68
2007-05-10 22:38:10 +00:00 · 2007-05-10 22:38:10 +00:00 · ca0dedf563
parent f4986af881
commit ca0dedf563
3 changed files with 57 additions and 64 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -231,6 +231,9 @@ Changes in runtime behavior
    codes.  To enable solr1.1 style /update, do not map "/update" to any 
    handler in solrconfig.xml (ryan)

+10. SOLR-231: If a charset is not specified in the contentType, 
+    ContentStream.getReader() will use UTF-8 encoding.  (ryan)
+
 Optimizations 
 1. SOLR-114: HashDocSet specific implementations of union() and andNot()
    for a 20x performance improvement for those set operations, and a new
--- a/src/java/org/apache/solr/util/ContentStreamBase.java
+++ b/src/java/org/apache/solr/util/ContentStreamBase.java
@ -22,6 +22,8 @@ import java.net.URLConnection;
 */
 public abstract class ContentStreamBase implements ContentStream
 {
+  public static final String DEFAULT_CHARSET = "utf-8";
+  
  protected String name;
  protected String sourceInfo;
  protected String contentType;
@ -137,12 +139,12 @@ public abstract class ContentStreamBase implements ContentStream

  /**
   * Base reader implementation.  If the contentType declares a 
-   * charset use it, otherwise use the system default.
+   * charset use it, otherwise use "utf-8".
   */
  public Reader getReader() throws IOException {
-    String charset = getCharsetFromContentType( contentType );
+    String charset = getCharsetFromContentType( getContentType() );
    return charset == null 
-      ? new InputStreamReader( getStream() )
+      ? new InputStreamReader( getStream(), DEFAULT_CHARSET )
      : new InputStreamReader( getStream(), charset );
  }

--- a/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java
+++ b/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java
@ -18,6 +18,7 @@
 package org.apache.solr.servlet;

 import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
@ -217,6 +218,52 @@ class SimpleRequestParser implements SolrRequestParser
  }
 }

+/**
+ * Wrap an HttpServletRequest as a ContentStream
+ */
+class HttpRequestContentStream extends ContentStreamBase
+{
+  private final HttpServletRequest req;
+  
+  public HttpRequestContentStream( HttpServletRequest req ) throws IOException {
+    this.req = req;
+    
+    contentType = req.getContentType();
+    // name = ???
+    // sourceInfo = ???
+    
+    String v = req.getHeader( "Content-Length" );
+    if( v != null ) {
+      size = Long.valueOf( v );
+    }
+  }
+
+  public InputStream getStream() throws IOException {
+    return req.getInputStream();
+  }
+}
+
+
+/**
+ * Wrap a FileItem as a ContentStream
+ */
+class FileItemContentStream extends ContentStreamBase
+{
+  private final FileItem item;
+  
+  public FileItemContentStream( FileItem f )
+  {
+    item = f;
+    contentType = item.getContentType();
+    name = item.getName();
+    sourceInfo = item.getFieldName();
+    size = item.getSize();
+  }
+    
+  public InputStream getStream() throws IOException {
+    return item.getInputStream();
+  }
+}

 /**
 * The simple parser just uses the params directly
@ -233,33 +280,7 @@ class RawRequestParser implements SolrRequestParser
    // Rather than return req.getReader(), this uses the default ContentStreamBase method
    // that checks for charset definitions in the ContentType.
    
-    streams.add( new ContentStream() {
-      public String getContentType() {
-        return req.getContentType();
-      }
-      public String getName() {
-        return null; // Is there any meaningful name?
-      }
-      public String getSourceInfo() {
-        return null; // Is there any meaningful source?
-      }
-      public Long getSize() { 
-        String v = req.getHeader( "Content-Length" );
-        if( v != null ) {
-          return Long.valueOf( v );
-        }
-        return null; 
-      }
-      public InputStream getStream() throws IOException {
-        return req.getInputStream();
-      }
-      public Reader getReader() throws IOException {
-        String charset = ContentStreamBase.getCharsetFromContentType( req.getContentType() );
-        return charset == null 
-          ? new InputStreamReader( getStream() )
-          : new InputStreamReader( getStream(), charset );
-      }
-    });
+    streams.add( new HttpRequestContentStream( req ) );
    return SolrRequestParsers.parseQueryString( req.getQueryString() );
  }
 }
@ -317,40 +338,6 @@ class MultipartRequestParser implements SolrRequestParser
    }
    return params;
  }
-  
-  /**
-   * Wrap a FileItem as a ContentStream
-   */
-  private static class FileItemContentStream extends ContentStreamBase
-  {
-    FileItem item;
-    
-    public FileItemContentStream( FileItem f )
-    {
-      item = f;
-    }
-    
-    public String getContentType() {
-      return item.getContentType();
-    }
-    
-    public String getName() {
-      return item.getName();
-    }
-    
-    public InputStream getStream() throws IOException {
-      return item.getInputStream();
-    }
-
-    public String getSourceInfo() {
-      return item.getFieldName();
-    }
-    
-    public Long getSize()
-    {
-      return item.getSize();
-    }
-  }
 }


@ -401,3 +388,4 @@ class StandardRequestParser implements SolrRequestParser



+