diff --git a/CHANGES.txt b/CHANGES.txt
index a545260bc0d..ec43261a216 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -131,6 +131,10 @@ New Features
18. SOLR-81: More SpellCheckerRequestHandler enhancements, inlcluding
support for relative or absolute directory path configurations, as
well as RAM based directory. (hossman)
+
+19. SOLR-197: New parameters for input: stream.contentType for specifying
+ or overriding the content type of input, and stream.file for reading
+ local files. (Ryan McKinley via yonik)
Changes in runtime behavior
1. Highlighting using DisMax will only pick up terms from the main
diff --git a/src/java/org/apache/solr/handler/DumpRequestHandler.java b/src/java/org/apache/solr/handler/DumpRequestHandler.java
index 5f344894fdb..d6d065866ca 100644
--- a/src/java/org/apache/solr/handler/DumpRequestHandler.java
+++ b/src/java/org/apache/solr/handler/DumpRequestHandler.java
@@ -18,11 +18,10 @@
package org.apache.solr.handler;
import java.io.IOException;
-import java.util.Map;
import java.util.ArrayList;
import org.apache.commons.io.IOUtils;
-import org.apache.solr.request.ContentStream;
+import org.apache.solr.util.ContentStream;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryResponse;
import org.apache.solr.util.NamedList;
diff --git a/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java b/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java
index add7cf821a0..de0b809af27 100644
--- a/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java
+++ b/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java
@@ -25,9 +25,10 @@ import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;
+import org.apache.commons.io.IOUtils;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrException;
-import org.apache.solr.request.ContentStream;
+import org.apache.solr.util.ContentStream;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
@@ -65,19 +66,7 @@ public class XmlUpdateRequestHandler extends RequestHandlerBase
throw new RuntimeException(e);
}
}
-
- // TODO - this should be a general utility in another class
- public static String getCharsetFromContentType( String contentType )
- {
- if( contentType != null ) {
- int idx = contentType.toLowerCase().indexOf( "charset=" );
- if( idx > 0 ) {
- return contentType.substring( idx + "charset=".length() ).trim();
- }
- }
- return null;
- }
-
+
@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception
@@ -87,21 +76,15 @@ public class XmlUpdateRequestHandler extends RequestHandlerBase
throw new SolrException( 400, "missing content stream" );
}
-
// Cycle through each stream
for( ContentStream stream : req.getContentStreams() ) {
- String charset = getCharsetFromContentType( stream.getContentType() );
- Reader reader = null;
- if( charset == null ) {
- reader = new InputStreamReader( stream.getStream() );
+ Reader reader = stream.getReader();
+ try {
+ rsp.add( "update", this.update( reader ) );
}
- else {
- reader = new InputStreamReader( stream.getStream(), charset );
+ finally {
+ IOUtils.closeQuietly(reader);
}
- rsp.add( "update", this.update( reader ) );
-
- // Make sure its closed
- try { reader.close(); } catch( Exception ex ){}
}
}
diff --git a/src/java/org/apache/solr/request/ContentStream.java b/src/java/org/apache/solr/request/ContentStream.java
index f0441cc48d1..0115bb48e67 100644
--- a/src/java/org/apache/solr/request/ContentStream.java
+++ b/src/java/org/apache/solr/request/ContentStream.java
@@ -17,13 +17,11 @@
package org.apache.solr.request;
-import java.io.IOException;
-import java.io.InputStream;
-public interface ContentStream {
- String getName();
- String getSourceInfo();
- String getContentType();
- Long getSize(); // size if we know it, otherwise null
- InputStream getStream() throws IOException;
+@Deprecated
+public interface ContentStream extends org.apache.solr.util.ContentStream {
+ // The contentstream should go in util because it is needed for
+ // SOLR-20 and does not need any lucene specific libraries.
+ // it is new since solr 1.1 so I think we can move it without the
+ // deprication...
}
diff --git a/src/java/org/apache/solr/request/SolrParams.java b/src/java/org/apache/solr/request/SolrParams.java
index 37eecb3e35b..8fbafd81a31 100644
--- a/src/java/org/apache/solr/request/SolrParams.java
+++ b/src/java/org/apache/solr/request/SolrParams.java
@@ -115,13 +115,22 @@ public abstract class SolrParams {
*/
public static final String FACET_PREFIX = "facet.prefix";
- /** If the content stream should come from a URL */
+ /** If the content stream should come from a URL (using URLConnection) */
public static final String STREAM_URL = "stream.url";
+ /** If the content stream should come from a File (using FileReader) */
+ public static final String STREAM_FILE = "stream.file";
+
/** If the content stream should come directly from a field */
public static final String STREAM_BODY = "stream.body";
-
+ /**
+ * Explicity set the content type for the input stream
+ * If multiple streams are specified, the explicit contentType
+ * will be used for all of them.
+ */
+ public static final String STREAM_CONTENTTYPE = "stream.contentType";
+
/** returns the String value of a param, or null if not set */
public abstract String get(String param);
diff --git a/src/java/org/apache/solr/request/SolrQueryRequest.java b/src/java/org/apache/solr/request/SolrQueryRequest.java
index a268ba3f9b4..705bba9ee68 100644
--- a/src/java/org/apache/solr/request/SolrQueryRequest.java
+++ b/src/java/org/apache/solr/request/SolrQueryRequest.java
@@ -19,6 +19,7 @@ package org.apache.solr.request;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.util.ContentStream;
import org.apache.solr.core.SolrCore;
import java.util.Map;
@@ -123,3 +124,4 @@ public interface SolrQueryRequest {
+
diff --git a/src/java/org/apache/solr/request/SolrQueryRequestBase.java b/src/java/org/apache/solr/request/SolrQueryRequestBase.java
index 6c96d89cd93..24c1d6aac0b 100644
--- a/src/java/org/apache/solr/request/SolrQueryRequestBase.java
+++ b/src/java/org/apache/solr/request/SolrQueryRequestBase.java
@@ -18,6 +18,7 @@
package org.apache.solr.request;
import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.util.ContentStream;
import org.apache.solr.util.RefCounted;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.core.SolrCore;
diff --git a/src/java/org/apache/solr/util/ContentStream.java b/src/java/org/apache/solr/util/ContentStream.java
new file mode 100755
index 00000000000..f2006805087
--- /dev/null
+++ b/src/java/org/apache/solr/util/ContentStream.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+
+import org.apache.commons.io.IOUtils;
+
+/**
+ * @author ryan
+ * @version $Id$
+ * @since solr 1.2
+ */
+public interface ContentStream {
+ String getName();
+ String getSourceInfo();
+ String getContentType();
+
+ /**
+ * @return the stream size or null
if not known
+ */
+ Long getSize(); // size if we know it, otherwise null
+
+ /**
+ * Get an open stream. You are responsible for closing it. Consider using
+ * something like:
+ *
+ * InputStream stream = stream.getStream(); + * try { + * // use the stream... + * } + * finally { + * IOUtils.closeQuietly(reader); + * } + *+ * + * Only the first call to
getStream()
or getReader()
+ * is gaurenteed to work. The runtime behavior for aditional calls is undefined.
+ */
+ InputStream getStream() throws IOException;
+
+ /**
+ * Get an open stream. You are responsible for closing it. Consider using
+ * something like:
+ * + * Reader reader = stream.getReader(); + * try { + * // use the reader... + * } + * finally { + * IOUtils.closeQuietly(reader); + * } + *+ * + * Only the first call to
getStream()
or getReader()
+ * is gaurenteed to work. The runtime behavior for aditional calls is undefined.
+ */
+ Reader getReader() throws IOException;
+}
diff --git a/src/java/org/apache/solr/util/ContentStreamBase.java b/src/java/org/apache/solr/util/ContentStreamBase.java
new file mode 100755
index 00000000000..2263d1697c1
--- /dev/null
+++ b/src/java/org/apache/solr/util/ContentStreamBase.java
@@ -0,0 +1,183 @@
+package org.apache.solr.util;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.StringReader;
+import java.net.URL;
+import java.net.URLConnection;
+
+
+/**
+ * Three concrete implementations for ContentStream - one for File/URL/String
+ *
+ * @author ryan
+ * @version $Id$
+ * @since solr 1.2
+ */
+public abstract class ContentStreamBase implements ContentStream
+{
+ protected String name;
+ protected String sourceInfo;
+ protected String contentType;
+ protected Long size;
+
+ //---------------------------------------------------------------------
+ //---------------------------------------------------------------------
+
+ public static String getCharsetFromContentType( String contentType )
+ {
+ if( contentType != null ) {
+ int idx = contentType.toLowerCase().indexOf( "charset=" );
+ if( idx > 0 ) {
+ return contentType.substring( idx + "charset=".length() ).trim();
+ }
+ }
+ return null;
+ }
+
+ //------------------------------------------------------------------------
+ //------------------------------------------------------------------------
+
+ /**
+ * Construct a ContentStream
from a URL
+ *
+ * This uses a {@Link URLConnection} to get the content stream
+ */
+ public static class URLStream extends ContentStreamBase
+ {
+ private final URL url;
+ final URLConnection conn;
+
+ public URLStream( URL url ) throws IOException {
+ this.url = url;
+ this.conn = this.url.openConnection();
+
+ contentType = conn.getContentType();
+ name = url.toExternalForm();
+ size = new Long( conn.getContentLength() );
+ sourceInfo = "url";
+ }
+
+ public InputStream getStream() throws IOException {
+ return conn.getInputStream();
+ }
+ }
+
+ /**
+ * Construct a ContentStream
from a File
+ */
+ public static class FileStream extends ContentStreamBase
+ {
+ private final File file;
+
+ public FileStream( File f ) throws IOException {
+ file = f;
+
+ contentType = null; // ??
+ name = file.getName();
+ size = file.length();
+ sourceInfo = file.toURI().toString();
+ }
+
+ public InputStream getStream() throws IOException {
+ return new FileInputStream( file );
+ }
+
+ /**
+ * If an charset is defined (by the contentType) ues that, otherwise
+ * use a file reader
+ */
+ public Reader getReader() throws IOException {
+ String charset = getCharsetFromContentType( contentType );
+ return charset == null
+ ? new FileReader( file )
+ : new InputStreamReader( getStream(), charset );
+ }
+ }
+
+
+ /**
+ * Construct a ContentStream
from a File
+ */
+ public static class StringStream extends ContentStreamBase
+ {
+ private final String str;
+
+ public StringStream( String str ) {
+ this.str = str;
+
+ contentType = null;
+ name = null;
+ size = new Long( str.length() );
+ sourceInfo = "string";
+ }
+
+ public InputStream getStream() throws IOException {
+ return new ByteArrayInputStream( str.getBytes() );
+ }
+
+ /**
+ * If an charset is defined (by the contentType) ues that, otherwise
+ * use a StringReader
+ */
+ public Reader getReader() throws IOException {
+ String charset = getCharsetFromContentType( contentType );
+ return charset == null
+ ? new StringReader( str )
+ : new InputStreamReader( getStream(), charset );
+ }
+ }
+
+ /**
+ * Base reader implementation. If the contentType declares a
+ * charset use it, otherwise use the system default.
+ */
+ public Reader getReader() throws IOException {
+ String charset = getCharsetFromContentType( contentType );
+ return charset == null
+ ? new InputStreamReader( getStream() )
+ : new InputStreamReader( getStream(), charset );
+ }
+
+ //------------------------------------------------------------------
+ // Getters / Setters for overrideable attributes
+ //------------------------------------------------------------------
+
+ public String getContentType() {
+ return contentType;
+ }
+
+ public void setContentType(String contentType) {
+ this.contentType = contentType;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public Long getSize() {
+ return size;
+ }
+
+ public void setSize(Long size) {
+ this.size = size;
+ }
+
+ public String getSourceInfo() {
+ return sourceInfo;
+ }
+
+ public void setSourceInfo(String sourceInfo) {
+ this.sourceInfo = sourceInfo;
+ }
+}
diff --git a/src/test/org/apache/solr/servlet/SolrRequestParserTest.java b/src/test/org/apache/solr/servlet/SolrRequestParserTest.java
index bb582226419..e10390632b3 100644
--- a/src/test/org/apache/solr/servlet/SolrRequestParserTest.java
+++ b/src/test/org/apache/solr/servlet/SolrRequestParserTest.java
@@ -31,7 +31,7 @@ import org.apache.commons.io.IOUtils;
import org.apache.solr.core.Config;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore;
-import org.apache.solr.request.ContentStream;
+import org.apache.solr.util.ContentStream;
import org.apache.solr.request.MapSolrParams;
import org.apache.solr.request.MultiMapSolrParams;
import org.apache.solr.request.SolrParams;
@@ -84,6 +84,15 @@ public class SolrRequestParserTest extends AbstractSolrTestCase {
Collections.sort( input );
Collections.sort( output );
assertEquals( input.toString(), output.toString() );
+
+ // set the contentType and make sure tat gets set
+ String ctype = "text/xxx";
+ streams = new ArrayList