From ca0dedf563721e1bae51483bb8c12e7c728a215c Mon Sep 17 00:00:00 2001
From: Ryan McKinley <ryan@apache.org>
Date: Thu, 10 May 2007 22:38:10 +0000
Subject: [PATCH] SOLR-231 -- use UTF-8 encoding unless something else is
 specified.

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@537024 13f79535-47bb-0310-9956-ffa450edef68
---
 CHANGES.txt                                   |   3 +
 .../apache/solr/util/ContentStreamBase.java   |   8 +-
 .../solr/servlet/SolrRequestParsers.java      | 110 ++++++++----------
 3 files changed, 57 insertions(+), 64 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index a927e7c2903..ee9d4fde241 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -231,6 +231,9 @@ Changes in runtime behavior
     codes.  To enable solr1.1 style /update, do not map "/update" to any 
     handler in solrconfig.xml (ryan)
 
+10. SOLR-231: If a charset is not specified in the contentType, 
+    ContentStream.getReader() will use UTF-8 encoding.  (ryan)
+
 Optimizations 
  1. SOLR-114: HashDocSet specific implementations of union() and andNot()
     for a 20x performance improvement for those set operations, and a new
diff --git a/src/java/org/apache/solr/util/ContentStreamBase.java b/src/java/org/apache/solr/util/ContentStreamBase.java
index 00a3c7c2d4e..890beb90253 100755
--- a/src/java/org/apache/solr/util/ContentStreamBase.java
+++ b/src/java/org/apache/solr/util/ContentStreamBase.java
@@ -22,6 +22,8 @@ import java.net.URLConnection;
  */
 public abstract class ContentStreamBase implements ContentStream
 {
+  public static final String DEFAULT_CHARSET = "utf-8";
+  
   protected String name;
   protected String sourceInfo;
   protected String contentType;
@@ -137,12 +139,12 @@ public abstract class ContentStreamBase implements ContentStream
 
   /**
    * Base reader implementation.  If the contentType declares a 
-   * charset use it, otherwise use the system default.
+   * charset use it, otherwise use "utf-8".
    */
   public Reader getReader() throws IOException {
-    String charset = getCharsetFromContentType( contentType );
+    String charset = getCharsetFromContentType( getContentType() );
     return charset == null 
-      ? new InputStreamReader( getStream() )
+      ? new InputStreamReader( getStream(), DEFAULT_CHARSET )
       : new InputStreamReader( getStream(), charset );
   }
 
diff --git a/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java b/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java
index 6420ae7a500..ba33a64867e 100644
--- a/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java
+++ b/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java
@@ -18,6 +18,7 @@
 package org.apache.solr.servlet;
 
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
@@ -217,6 +218,52 @@ class SimpleRequestParser implements SolrRequestParser
   }
 }
 
+/**
+ * Wrap an HttpServletRequest as a ContentStream
+ */
+class HttpRequestContentStream extends ContentStreamBase
+{
+  private final HttpServletRequest req;
+  
+  public HttpRequestContentStream( HttpServletRequest req ) throws IOException {
+    this.req = req;
+    
+    contentType = req.getContentType();
+    // name = ???
+    // sourceInfo = ???
+    
+    String v = req.getHeader( "Content-Length" );
+    if( v != null ) {
+      size = Long.valueOf( v );
+    }
+  }
+
+  public InputStream getStream() throws IOException {
+    return req.getInputStream();
+  }
+}
+
+
+/**
+ * Wrap a FileItem as a ContentStream
+ */
+class FileItemContentStream extends ContentStreamBase
+{
+  private final FileItem item;
+  
+  public FileItemContentStream( FileItem f )
+  {
+    item = f;
+    contentType = item.getContentType();
+    name = item.getName();
+    sourceInfo = item.getFieldName();
+    size = item.getSize();
+  }
+    
+  public InputStream getStream() throws IOException {
+    return item.getInputStream();
+  }
+}
 
 /**
  * The simple parser just uses the params directly
@@ -233,33 +280,7 @@ class RawRequestParser implements SolrRequestParser
     // Rather than return req.getReader(), this uses the default ContentStreamBase method
     // that checks for charset definitions in the ContentType.
     
-    streams.add( new ContentStream() {
-      public String getContentType() {
-        return req.getContentType();
-      }
-      public String getName() {
-        return null; // Is there any meaningful name?
-      }
-      public String getSourceInfo() {
-        return null; // Is there any meaningful source?
-      }
-      public Long getSize() { 
-        String v = req.getHeader( "Content-Length" );
-        if( v != null ) {
-          return Long.valueOf( v );
-        }
-        return null; 
-      }
-      public InputStream getStream() throws IOException {
-        return req.getInputStream();
-      }
-      public Reader getReader() throws IOException {
-        String charset = ContentStreamBase.getCharsetFromContentType( req.getContentType() );
-        return charset == null 
-          ? new InputStreamReader( getStream() )
-          : new InputStreamReader( getStream(), charset );
-      }
-    });
+    streams.add( new HttpRequestContentStream( req ) );
     return SolrRequestParsers.parseQueryString( req.getQueryString() );
   }
 }
@@ -317,40 +338,6 @@ class MultipartRequestParser implements SolrRequestParser
     }
     return params;
   }
-  
-  /**
-   * Wrap a FileItem as a ContentStream
-   */
-  private static class FileItemContentStream extends ContentStreamBase
-  {
-    FileItem item;
-    
-    public FileItemContentStream( FileItem f )
-    {
-      item = f;
-    }
-    
-    public String getContentType() {
-      return item.getContentType();
-    }
-    
-    public String getName() {
-      return item.getName();
-    }
-    
-    public InputStream getStream() throws IOException {
-      return item.getInputStream();
-    }
-
-    public String getSourceInfo() {
-      return item.getFieldName();
-    }
-    
-    public Long getSize()
-    {
-      return item.getSize();
-    }
-  }
 }
 
 
@@ -401,3 +388,4 @@ class StandardRequestParser implements SolrRequestParser
 
 
 
+