From ce41a7b23187465e548305c6a3d5334189aaab46 Mon Sep 17 00:00:00 2001 From: Erik Hatcher Date: Thu, 27 Oct 2011 15:24:14 +0000 Subject: [PATCH] SOLR-2854: Fix ExtractingRequestHandler to call getStream before getting stream attributes. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1189803 13f79535-47bb-0310-9956-ffa450edef68 --- .../handler/extraction/ExtractingDocumentLoader.java | 10 +++++----- .../org/apache/solr/common/util/ContentStream.java | 8 ++++++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java index 62a42c8c998..bbc3ba97fb3 100644 --- a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java +++ b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java @@ -143,10 +143,6 @@ public class ExtractingDocumentLoader extends ContentStreamLoader { } if (parser != null) { Metadata metadata = new Metadata(); - metadata.add(ExtractingMetadataConstants.STREAM_NAME, stream.getName()); - metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo()); - metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize())); - metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType()); // If you specify the resource name (the filename, roughly) with this parameter, // then Tika can make use of it in guessing the appropriate MIME type: @@ -155,12 +151,16 @@ public class ExtractingDocumentLoader extends ContentStreamLoader { metadata.add(Metadata.RESOURCE_NAME_KEY, resourceName); } - SolrContentHandler handler = factory.createSolrContentHandler(metadata, params, schema); InputStream inputStream = null; try { inputStream = stream.getStream(); + metadata.add(ExtractingMetadataConstants.STREAM_NAME, stream.getName()); + metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo()); + metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize())); + metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType()); String xpathExpr = params.get(ExtractingParams.XPATH_EXPRESSION); boolean extractOnly = params.getBool(ExtractingParams.EXTRACT_ONLY, false); + SolrContentHandler handler = factory.createSolrContentHandler(metadata, params, schema); ContentHandler parsingHandler = handler; StringWriter writer = null; diff --git a/solr/solrj/src/java/org/apache/solr/common/util/ContentStream.java b/solr/solrj/src/java/org/apache/solr/common/util/ContentStream.java index de8d48bcf50..d416c5c7376 100755 --- a/solr/solrj/src/java/org/apache/solr/common/util/ContentStream.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/ContentStream.java @@ -50,6 +50,10 @@ public interface ContentStream { * * Only the first call to getStream() or getReader() * is guaranteed to work. The runtime behavior for additional calls is undefined. + * + * Note: you must call getStream() or getReader() before + * the attributes (name, contentType, etc) are guaranteed to be set. Streams may be + * lazy loaded only when this method is called. */ InputStream getStream() throws IOException; @@ -68,6 +72,10 @@ public interface ContentStream { * * Only the first call to getStream() or getReader() * is guaranteed to work. The runtime behavior for additional calls is undefined. + * + * Note: you must call getStream() or getReader() before + * the attributes (name, contentType, etc) are guaranteed to be set. Streams may be + * lazy loaded only when this method is called. */ Reader getReader() throws IOException; }