SOLR-231 -- use UTF-8 encoding unless something else is specified.

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@537024 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Ryan McKinley 2007-05-10 22:38:10 +00:00
parent f4986af881
commit ca0dedf563
3 changed files with 57 additions and 64 deletions

View File

@ -231,6 +231,9 @@ Changes in runtime behavior
codes. To enable solr1.1 style /update, do not map "/update" to any codes. To enable solr1.1 style /update, do not map "/update" to any
handler in solrconfig.xml (ryan) handler in solrconfig.xml (ryan)
10. SOLR-231: If a charset is not specified in the contentType,
ContentStream.getReader() will use UTF-8 encoding. (ryan)
Optimizations Optimizations
1. SOLR-114: HashDocSet specific implementations of union() and andNot() 1. SOLR-114: HashDocSet specific implementations of union() and andNot()
for a 20x performance improvement for those set operations, and a new for a 20x performance improvement for those set operations, and a new

View File

@ -22,6 +22,8 @@ import java.net.URLConnection;
*/ */
public abstract class ContentStreamBase implements ContentStream public abstract class ContentStreamBase implements ContentStream
{ {
public static final String DEFAULT_CHARSET = "utf-8";
protected String name; protected String name;
protected String sourceInfo; protected String sourceInfo;
protected String contentType; protected String contentType;
@ -137,12 +139,12 @@ public abstract class ContentStreamBase implements ContentStream
/** /**
* Base reader implementation. If the contentType declares a * Base reader implementation. If the contentType declares a
* charset use it, otherwise use the system default. * charset use it, otherwise use "utf-8".
*/ */
public Reader getReader() throws IOException { public Reader getReader() throws IOException {
String charset = getCharsetFromContentType( contentType ); String charset = getCharsetFromContentType( getContentType() );
return charset == null return charset == null
? new InputStreamReader( getStream() ) ? new InputStreamReader( getStream(), DEFAULT_CHARSET )
: new InputStreamReader( getStream(), charset ); : new InputStreamReader( getStream(), charset );
} }

View File

@ -18,6 +18,7 @@
package org.apache.solr.servlet; package org.apache.solr.servlet;
import java.io.File; import java.io.File;
import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.InputStreamReader; import java.io.InputStreamReader;
@ -217,6 +218,52 @@ class SimpleRequestParser implements SolrRequestParser
} }
} }
/**
* Wrap an HttpServletRequest as a ContentStream
*/
class HttpRequestContentStream extends ContentStreamBase
{
private final HttpServletRequest req;
public HttpRequestContentStream( HttpServletRequest req ) throws IOException {
this.req = req;
contentType = req.getContentType();
// name = ???
// sourceInfo = ???
String v = req.getHeader( "Content-Length" );
if( v != null ) {
size = Long.valueOf( v );
}
}
public InputStream getStream() throws IOException {
return req.getInputStream();
}
}
/**
* Wrap a FileItem as a ContentStream
*/
class FileItemContentStream extends ContentStreamBase
{
private final FileItem item;
public FileItemContentStream( FileItem f )
{
item = f;
contentType = item.getContentType();
name = item.getName();
sourceInfo = item.getFieldName();
size = item.getSize();
}
public InputStream getStream() throws IOException {
return item.getInputStream();
}
}
/** /**
* The simple parser just uses the params directly * The simple parser just uses the params directly
@ -233,33 +280,7 @@ class RawRequestParser implements SolrRequestParser
// Rather than return req.getReader(), this uses the default ContentStreamBase method // Rather than return req.getReader(), this uses the default ContentStreamBase method
// that checks for charset definitions in the ContentType. // that checks for charset definitions in the ContentType.
streams.add( new ContentStream() { streams.add( new HttpRequestContentStream( req ) );
public String getContentType() {
return req.getContentType();
}
public String getName() {
return null; // Is there any meaningful name?
}
public String getSourceInfo() {
return null; // Is there any meaningful source?
}
public Long getSize() {
String v = req.getHeader( "Content-Length" );
if( v != null ) {
return Long.valueOf( v );
}
return null;
}
public InputStream getStream() throws IOException {
return req.getInputStream();
}
public Reader getReader() throws IOException {
String charset = ContentStreamBase.getCharsetFromContentType( req.getContentType() );
return charset == null
? new InputStreamReader( getStream() )
: new InputStreamReader( getStream(), charset );
}
});
return SolrRequestParsers.parseQueryString( req.getQueryString() ); return SolrRequestParsers.parseQueryString( req.getQueryString() );
} }
} }
@ -317,40 +338,6 @@ class MultipartRequestParser implements SolrRequestParser
} }
return params; return params;
} }
/**
* Wrap a FileItem as a ContentStream
*/
private static class FileItemContentStream extends ContentStreamBase
{
FileItem item;
public FileItemContentStream( FileItem f )
{
item = f;
}
public String getContentType() {
return item.getContentType();
}
public String getName() {
return item.getName();
}
public InputStream getStream() throws IOException {
return item.getInputStream();
}
public String getSourceInfo() {
return item.getFieldName();
}
public Long getSize()
{
return item.getSize();
}
}
} }
@ -401,3 +388,4 @@ class StandardRequestParser implements SolrRequestParser