SOLR-2854: Fix ExtractingRequestHandler to call getStream before getting stream attributes.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1189803 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Erik Hatcher 2011-10-27 15:24:14 +00:00
parent ff3222e9ff
commit ce41a7b231
2 changed files with 13 additions and 5 deletions

View File

@ -143,10 +143,6 @@ public class ExtractingDocumentLoader extends ContentStreamLoader {
}
if (parser != null) {
Metadata metadata = new Metadata();
metadata.add(ExtractingMetadataConstants.STREAM_NAME, stream.getName());
metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo());
metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize()));
metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType());
// If you specify the resource name (the filename, roughly) with this parameter,
// then Tika can make use of it in guessing the appropriate MIME type:
@ -155,12 +151,16 @@ public class ExtractingDocumentLoader extends ContentStreamLoader {
metadata.add(Metadata.RESOURCE_NAME_KEY, resourceName);
}
SolrContentHandler handler = factory.createSolrContentHandler(metadata, params, schema);
InputStream inputStream = null;
try {
inputStream = stream.getStream();
metadata.add(ExtractingMetadataConstants.STREAM_NAME, stream.getName());
metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo());
metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize()));
metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType());
String xpathExpr = params.get(ExtractingParams.XPATH_EXPRESSION);
boolean extractOnly = params.getBool(ExtractingParams.EXTRACT_ONLY, false);
SolrContentHandler handler = factory.createSolrContentHandler(metadata, params, schema);
ContentHandler parsingHandler = handler;
StringWriter writer = null;

View File

@ -50,6 +50,10 @@ public interface ContentStream {
*
* Only the first call to <code>getStream()</code> or <code>getReader()</code>
* is guaranteed to work. The runtime behavior for additional calls is undefined.
*
* Note: you must call <code>getStream()</code> or <code>getReader()</code> before
* the attributes (name, contentType, etc) are guaranteed to be set. Streams may be
* lazy loaded only when this method is called.
*/
InputStream getStream() throws IOException;
@ -68,6 +72,10 @@ public interface ContentStream {
*
* Only the first call to <code>getStream()</code> or <code>getReader()</code>
* is guaranteed to work. The runtime behavior for additional calls is undefined.
*
* Note: you must call <code>getStream()</code> or <code>getReader()</code> before
* the attributes (name, contentType, etc) are guaranteed to be set. Streams may be
* lazy loaded only when this method is called.
*/
Reader getReader() throws IOException;
}