mirror of https://github.com/apache/lucene.git
SOLR-2854: Fix ExtractingRequestHandler to call getStream before getting stream attributes.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1189803 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ff3222e9ff
commit
ce41a7b231
|
@ -143,10 +143,6 @@ public class ExtractingDocumentLoader extends ContentStreamLoader {
|
||||||
}
|
}
|
||||||
if (parser != null) {
|
if (parser != null) {
|
||||||
Metadata metadata = new Metadata();
|
Metadata metadata = new Metadata();
|
||||||
metadata.add(ExtractingMetadataConstants.STREAM_NAME, stream.getName());
|
|
||||||
metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo());
|
|
||||||
metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize()));
|
|
||||||
metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType());
|
|
||||||
|
|
||||||
// If you specify the resource name (the filename, roughly) with this parameter,
|
// If you specify the resource name (the filename, roughly) with this parameter,
|
||||||
// then Tika can make use of it in guessing the appropriate MIME type:
|
// then Tika can make use of it in guessing the appropriate MIME type:
|
||||||
|
@ -155,12 +151,16 @@ public class ExtractingDocumentLoader extends ContentStreamLoader {
|
||||||
metadata.add(Metadata.RESOURCE_NAME_KEY, resourceName);
|
metadata.add(Metadata.RESOURCE_NAME_KEY, resourceName);
|
||||||
}
|
}
|
||||||
|
|
||||||
SolrContentHandler handler = factory.createSolrContentHandler(metadata, params, schema);
|
|
||||||
InputStream inputStream = null;
|
InputStream inputStream = null;
|
||||||
try {
|
try {
|
||||||
inputStream = stream.getStream();
|
inputStream = stream.getStream();
|
||||||
|
metadata.add(ExtractingMetadataConstants.STREAM_NAME, stream.getName());
|
||||||
|
metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo());
|
||||||
|
metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize()));
|
||||||
|
metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType());
|
||||||
String xpathExpr = params.get(ExtractingParams.XPATH_EXPRESSION);
|
String xpathExpr = params.get(ExtractingParams.XPATH_EXPRESSION);
|
||||||
boolean extractOnly = params.getBool(ExtractingParams.EXTRACT_ONLY, false);
|
boolean extractOnly = params.getBool(ExtractingParams.EXTRACT_ONLY, false);
|
||||||
|
SolrContentHandler handler = factory.createSolrContentHandler(metadata, params, schema);
|
||||||
ContentHandler parsingHandler = handler;
|
ContentHandler parsingHandler = handler;
|
||||||
|
|
||||||
StringWriter writer = null;
|
StringWriter writer = null;
|
||||||
|
|
|
@ -50,6 +50,10 @@ public interface ContentStream {
|
||||||
*
|
*
|
||||||
* Only the first call to <code>getStream()</code> or <code>getReader()</code>
|
* Only the first call to <code>getStream()</code> or <code>getReader()</code>
|
||||||
* is guaranteed to work. The runtime behavior for additional calls is undefined.
|
* is guaranteed to work. The runtime behavior for additional calls is undefined.
|
||||||
|
*
|
||||||
|
* Note: you must call <code>getStream()</code> or <code>getReader()</code> before
|
||||||
|
* the attributes (name, contentType, etc) are guaranteed to be set. Streams may be
|
||||||
|
* lazy loaded only when this method is called.
|
||||||
*/
|
*/
|
||||||
InputStream getStream() throws IOException;
|
InputStream getStream() throws IOException;
|
||||||
|
|
||||||
|
@ -68,6 +72,10 @@ public interface ContentStream {
|
||||||
*
|
*
|
||||||
* Only the first call to <code>getStream()</code> or <code>getReader()</code>
|
* Only the first call to <code>getStream()</code> or <code>getReader()</code>
|
||||||
* is guaranteed to work. The runtime behavior for additional calls is undefined.
|
* is guaranteed to work. The runtime behavior for additional calls is undefined.
|
||||||
|
*
|
||||||
|
* Note: you must call <code>getStream()</code> or <code>getReader()</code> before
|
||||||
|
* the attributes (name, contentType, etc) are guaranteed to be set. Streams may be
|
||||||
|
* lazy loaded only when this method is called.
|
||||||
*/
|
*/
|
||||||
Reader getReader() throws IOException;
|
Reader getReader() throws IOException;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue