mirror of https://github.com/apache/lucene.git
SOLR-2346: Add a chance to set content encoding explicitly via content type of stream.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1225120 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7c7c7bd077
commit
21822811a9
|
@ -30,7 +30,9 @@ $Id$
|
|||
|
||||
================== Release 3.6.0 ==================
|
||||
|
||||
(No Changes)
|
||||
* SOLR-2346: Add a chance to set content encoding explicitly via content type of stream.
|
||||
This is convenient when Tika's auto detector cannot detect encoding, especially
|
||||
the text file is too short to detect encoding. (koji)
|
||||
|
||||
================== Release 3.5.0 ==================
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.solr.common.SolrException;
|
|||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.UpdateParams;
|
||||
import org.apache.solr.common.util.ContentStream;
|
||||
import org.apache.solr.common.util.ContentStreamBase;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.handler.ContentStreamLoader;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
|
@ -158,6 +159,12 @@ public class ExtractingDocumentLoader extends ContentStreamLoader {
|
|||
metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo());
|
||||
metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize()));
|
||||
metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType());
|
||||
// HtmlParser and TXTParser regard Metadata.CONTENT_ENCODING in metadata
|
||||
String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
|
||||
if(charset != null){
|
||||
metadata.add(Metadata.CONTENT_ENCODING, charset);
|
||||
}
|
||||
|
||||
String xpathExpr = params.get(ExtractingParams.XPATH_EXPRESSION);
|
||||
boolean extractOnly = params.getBool(ExtractingParams.EXTRACT_ONLY, false);
|
||||
SolrContentHandler handler = factory.createSolrContentHandler(metadata, params, schema);
|
||||
|
|
Loading…
Reference in New Issue