diff --git a/CHANGES.txt b/CHANGES.txt index a545260bc0d..ec43261a216 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -131,6 +131,10 @@ New Features 18. SOLR-81: More SpellCheckerRequestHandler enhancements, inlcluding support for relative or absolute directory path configurations, as well as RAM based directory. (hossman) + +19. SOLR-197: New parameters for input: stream.contentType for specifying + or overriding the content type of input, and stream.file for reading + local files. (Ryan McKinley via yonik) Changes in runtime behavior 1. Highlighting using DisMax will only pick up terms from the main diff --git a/src/java/org/apache/solr/handler/DumpRequestHandler.java b/src/java/org/apache/solr/handler/DumpRequestHandler.java index 5f344894fdb..d6d065866ca 100644 --- a/src/java/org/apache/solr/handler/DumpRequestHandler.java +++ b/src/java/org/apache/solr/handler/DumpRequestHandler.java @@ -18,11 +18,10 @@ package org.apache.solr.handler; import java.io.IOException; -import java.util.Map; import java.util.ArrayList; import org.apache.commons.io.IOUtils; -import org.apache.solr.request.ContentStream; +import org.apache.solr.util.ContentStream; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryResponse; import org.apache.solr.util.NamedList; diff --git a/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java b/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java index add7cf821a0..de0b809af27 100644 --- a/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java +++ b/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java @@ -25,9 +25,10 @@ import java.util.ArrayList; import java.util.List; import java.util.logging.Logger; +import org.apache.commons.io.IOUtils; import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrException; -import org.apache.solr.request.ContentStream; +import org.apache.solr.util.ContentStream; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryResponse; import org.apache.solr.schema.IndexSchema; @@ -65,19 +66,7 @@ public class XmlUpdateRequestHandler extends RequestHandlerBase throw new RuntimeException(e); } } - - // TODO - this should be a general utility in another class - public static String getCharsetFromContentType( String contentType ) - { - if( contentType != null ) { - int idx = contentType.toLowerCase().indexOf( "charset=" ); - if( idx > 0 ) { - return contentType.substring( idx + "charset=".length() ).trim(); - } - } - return null; - } - + @Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception @@ -87,21 +76,15 @@ public class XmlUpdateRequestHandler extends RequestHandlerBase throw new SolrException( 400, "missing content stream" ); } - // Cycle through each stream for( ContentStream stream : req.getContentStreams() ) { - String charset = getCharsetFromContentType( stream.getContentType() ); - Reader reader = null; - if( charset == null ) { - reader = new InputStreamReader( stream.getStream() ); + Reader reader = stream.getReader(); + try { + rsp.add( "update", this.update( reader ) ); } - else { - reader = new InputStreamReader( stream.getStream(), charset ); + finally { + IOUtils.closeQuietly(reader); } - rsp.add( "update", this.update( reader ) ); - - // Make sure its closed - try { reader.close(); } catch( Exception ex ){} } } diff --git a/src/java/org/apache/solr/request/ContentStream.java b/src/java/org/apache/solr/request/ContentStream.java index f0441cc48d1..0115bb48e67 100644 --- a/src/java/org/apache/solr/request/ContentStream.java +++ b/src/java/org/apache/solr/request/ContentStream.java @@ -17,13 +17,11 @@ package org.apache.solr.request; -import java.io.IOException; -import java.io.InputStream; -public interface ContentStream { - String getName(); - String getSourceInfo(); - String getContentType(); - Long getSize(); // size if we know it, otherwise null - InputStream getStream() throws IOException; +@Deprecated +public interface ContentStream extends org.apache.solr.util.ContentStream { + // The contentstream should go in util because it is needed for + // SOLR-20 and does not need any lucene specific libraries. + // it is new since solr 1.1 so I think we can move it without the + // deprication... } diff --git a/src/java/org/apache/solr/request/SolrParams.java b/src/java/org/apache/solr/request/SolrParams.java index 37eecb3e35b..8fbafd81a31 100644 --- a/src/java/org/apache/solr/request/SolrParams.java +++ b/src/java/org/apache/solr/request/SolrParams.java @@ -115,13 +115,22 @@ public abstract class SolrParams { */ public static final String FACET_PREFIX = "facet.prefix"; - /** If the content stream should come from a URL */ + /** If the content stream should come from a URL (using URLConnection) */ public static final String STREAM_URL = "stream.url"; + /** If the content stream should come from a File (using FileReader) */ + public static final String STREAM_FILE = "stream.file"; + /** If the content stream should come directly from a field */ public static final String STREAM_BODY = "stream.body"; - + /** + * Explicity set the content type for the input stream + * If multiple streams are specified, the explicit contentType + * will be used for all of them. + */ + public static final String STREAM_CONTENTTYPE = "stream.contentType"; + /** returns the String value of a param, or null if not set */ public abstract String get(String param); diff --git a/src/java/org/apache/solr/request/SolrQueryRequest.java b/src/java/org/apache/solr/request/SolrQueryRequest.java index a268ba3f9b4..705bba9ee68 100644 --- a/src/java/org/apache/solr/request/SolrQueryRequest.java +++ b/src/java/org/apache/solr/request/SolrQueryRequest.java @@ -19,6 +19,7 @@ package org.apache.solr.request; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.schema.IndexSchema; +import org.apache.solr.util.ContentStream; import org.apache.solr.core.SolrCore; import java.util.Map; @@ -123,3 +124,4 @@ public interface SolrQueryRequest { + diff --git a/src/java/org/apache/solr/request/SolrQueryRequestBase.java b/src/java/org/apache/solr/request/SolrQueryRequestBase.java index 6c96d89cd93..24c1d6aac0b 100644 --- a/src/java/org/apache/solr/request/SolrQueryRequestBase.java +++ b/src/java/org/apache/solr/request/SolrQueryRequestBase.java @@ -18,6 +18,7 @@ package org.apache.solr.request; import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.util.ContentStream; import org.apache.solr.util.RefCounted; import org.apache.solr.schema.IndexSchema; import org.apache.solr.core.SolrCore; diff --git a/src/java/org/apache/solr/util/ContentStream.java b/src/java/org/apache/solr/util/ContentStream.java new file mode 100755 index 00000000000..f2006805087 --- /dev/null +++ b/src/java/org/apache/solr/util/ContentStream.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.util; + +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; + +import org.apache.commons.io.IOUtils; + +/** + * @author ryan + * @version $Id$ + * @since solr 1.2 + */ +public interface ContentStream { + String getName(); + String getSourceInfo(); + String getContentType(); + + /** + * @return the stream size or null if not known + */ + Long getSize(); // size if we know it, otherwise null + + /** + * Get an open stream. You are responsible for closing it. Consider using + * something like: + *
+   *   InputStream stream = stream.getStream();
+   *   try {
+   *     // use the stream...
+   *   }
+   *   finally {
+   *     IOUtils.closeQuietly(reader);
+   *   }
+   *  
+ * + * Only the first call to getStream() or getReader() + * is gaurenteed to work. The runtime behavior for aditional calls is undefined. + */ + InputStream getStream() throws IOException; + + /** + * Get an open stream. You are responsible for closing it. Consider using + * something like: + *
+   *   Reader reader = stream.getReader();
+   *   try {
+   *     // use the reader...
+   *   }
+   *   finally {
+   *     IOUtils.closeQuietly(reader);
+   *   }
+   *  
+ * + * Only the first call to getStream() or getReader() + * is gaurenteed to work. The runtime behavior for aditional calls is undefined. + */ + Reader getReader() throws IOException; +} diff --git a/src/java/org/apache/solr/util/ContentStreamBase.java b/src/java/org/apache/solr/util/ContentStreamBase.java new file mode 100755 index 00000000000..2263d1697c1 --- /dev/null +++ b/src/java/org/apache/solr/util/ContentStreamBase.java @@ -0,0 +1,183 @@ +package org.apache.solr.util; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StringReader; +import java.net.URL; +import java.net.URLConnection; + + +/** + * Three concrete implementations for ContentStream - one for File/URL/String + * + * @author ryan + * @version $Id$ + * @since solr 1.2 + */ +public abstract class ContentStreamBase implements ContentStream +{ + protected String name; + protected String sourceInfo; + protected String contentType; + protected Long size; + + //--------------------------------------------------------------------- + //--------------------------------------------------------------------- + + public static String getCharsetFromContentType( String contentType ) + { + if( contentType != null ) { + int idx = contentType.toLowerCase().indexOf( "charset=" ); + if( idx > 0 ) { + return contentType.substring( idx + "charset=".length() ).trim(); + } + } + return null; + } + + //------------------------------------------------------------------------ + //------------------------------------------------------------------------ + + /** + * Construct a ContentStream from a URL + * + * This uses a {@Link URLConnection} to get the content stream + */ + public static class URLStream extends ContentStreamBase + { + private final URL url; + final URLConnection conn; + + public URLStream( URL url ) throws IOException { + this.url = url; + this.conn = this.url.openConnection(); + + contentType = conn.getContentType(); + name = url.toExternalForm(); + size = new Long( conn.getContentLength() ); + sourceInfo = "url"; + } + + public InputStream getStream() throws IOException { + return conn.getInputStream(); + } + } + + /** + * Construct a ContentStream from a File + */ + public static class FileStream extends ContentStreamBase + { + private final File file; + + public FileStream( File f ) throws IOException { + file = f; + + contentType = null; // ?? + name = file.getName(); + size = file.length(); + sourceInfo = file.toURI().toString(); + } + + public InputStream getStream() throws IOException { + return new FileInputStream( file ); + } + + /** + * If an charset is defined (by the contentType) ues that, otherwise + * use a file reader + */ + public Reader getReader() throws IOException { + String charset = getCharsetFromContentType( contentType ); + return charset == null + ? new FileReader( file ) + : new InputStreamReader( getStream(), charset ); + } + } + + + /** + * Construct a ContentStream from a File + */ + public static class StringStream extends ContentStreamBase + { + private final String str; + + public StringStream( String str ) { + this.str = str; + + contentType = null; + name = null; + size = new Long( str.length() ); + sourceInfo = "string"; + } + + public InputStream getStream() throws IOException { + return new ByteArrayInputStream( str.getBytes() ); + } + + /** + * If an charset is defined (by the contentType) ues that, otherwise + * use a StringReader + */ + public Reader getReader() throws IOException { + String charset = getCharsetFromContentType( contentType ); + return charset == null + ? new StringReader( str ) + : new InputStreamReader( getStream(), charset ); + } + } + + /** + * Base reader implementation. If the contentType declares a + * charset use it, otherwise use the system default. + */ + public Reader getReader() throws IOException { + String charset = getCharsetFromContentType( contentType ); + return charset == null + ? new InputStreamReader( getStream() ) + : new InputStreamReader( getStream(), charset ); + } + + //------------------------------------------------------------------ + // Getters / Setters for overrideable attributes + //------------------------------------------------------------------ + + public String getContentType() { + return contentType; + } + + public void setContentType(String contentType) { + this.contentType = contentType; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public Long getSize() { + return size; + } + + public void setSize(Long size) { + this.size = size; + } + + public String getSourceInfo() { + return sourceInfo; + } + + public void setSourceInfo(String sourceInfo) { + this.sourceInfo = sourceInfo; + } +} diff --git a/src/test/org/apache/solr/servlet/SolrRequestParserTest.java b/src/test/org/apache/solr/servlet/SolrRequestParserTest.java index bb582226419..e10390632b3 100644 --- a/src/test/org/apache/solr/servlet/SolrRequestParserTest.java +++ b/src/test/org/apache/solr/servlet/SolrRequestParserTest.java @@ -31,7 +31,7 @@ import org.apache.commons.io.IOUtils; import org.apache.solr.core.Config; import org.apache.solr.core.SolrConfig; import org.apache.solr.core.SolrCore; -import org.apache.solr.request.ContentStream; +import org.apache.solr.util.ContentStream; import org.apache.solr.request.MapSolrParams; import org.apache.solr.request.MultiMapSolrParams; import org.apache.solr.request.SolrParams; @@ -84,6 +84,15 @@ public class SolrRequestParserTest extends AbstractSolrTestCase { Collections.sort( input ); Collections.sort( output ); assertEquals( input.toString(), output.toString() ); + + // set the contentType and make sure tat gets set + String ctype = "text/xxx"; + streams = new ArrayList(); + args.put( SolrParams.STREAM_CONTENTTYPE, new String[] {ctype} ); + parser.buildRequestFrom( new MultiMapSolrParams( args ), streams ); + for( ContentStream s : streams ) { + assertEquals( ctype, s.getContentType() ); + } } diff --git a/src/test/org/apache/solr/update/AutoCommitTest.java b/src/test/org/apache/solr/update/AutoCommitTest.java index 6721f096a83..14acd055ac9 100644 --- a/src/test/org/apache/solr/update/AutoCommitTest.java +++ b/src/test/org/apache/solr/update/AutoCommitTest.java @@ -17,20 +17,18 @@ package org.apache.solr.update; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import org.apache.solr.core.SolrCore; import org.apache.solr.handler.XmlUpdateRequestHandler; -import org.apache.solr.request.ContentStream; +import org.apache.solr.util.ContentStream; import org.apache.solr.request.MapSolrParams; import org.apache.solr.request.SolrQueryRequestBase; import org.apache.solr.request.SolrQueryResponse; import org.apache.solr.util.AbstractSolrTestCase; +import org.apache.solr.util.ContentStreamBase; /** * @@ -43,25 +41,18 @@ public class AutoCommitTest extends AbstractSolrTestCase { public String getSolrConfigFile() { return "solrconfig.xml"; } /** - * Take a string and make it an iterable ContentStream - * - * This should be moved to a helper class. (it is useful for the client too!) - */ - public static Collection toContentStreams( final String str, final String contentType ) - { - ArrayList streams = new ArrayList(); - streams.add( new ContentStream() { - public String getContentType() { return contentType; } - public Long getSize() { return Long.valueOf( str.length() ); } - public String getName() { return null; } - public String getSourceInfo() { return null; } - - public InputStream getStream() throws IOException { - return new ByteArrayInputStream( str.getBytes() ); - } - }); - return streams; - } + * Take a string and make it an iterable ContentStream + * + * This should be moved to a helper class. (it is useful for the client too!) + */ + public static Collection toContentStreams( final String str, final String contentType ) + { + ArrayList streams = new ArrayList(); + ContentStreamBase stream = new ContentStreamBase.StringStream( str ); + stream.setContentType( contentType ); + streams.add( stream ); + return streams; + } public void testMaxDocs() throws Exception { diff --git a/src/test/org/apache/solr/util/ContentStreamTest.java b/src/test/org/apache/solr/util/ContentStreamTest.java new file mode 100755 index 00000000000..e106daf815a --- /dev/null +++ b/src/test/org/apache/solr/util/ContentStreamTest.java @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.util; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.io.StringReader; +import java.net.URL; + +import org.apache.commons.io.IOUtils; + +import junit.framework.TestCase; + +/** + * @author ryan + */ +public class ContentStreamTest extends TestCase +{ + public void testStringStream() throws IOException + { + String input = "aads ghaskdgasgldj asl sadg ajdsg &jag # @ hjsakg hsakdg hjkas s"; + ContentStreamBase stream = new ContentStreamBase.StringStream( input ); + assertEquals( input.length(), stream.getSize().intValue() ); + assertEquals( input, IOUtils.toString( stream.getStream() ) ); + assertEquals( input, IOUtils.toString( stream.getReader() ) ); + } + + public void testFileStream() throws IOException + { + File file = new File( "README" ); + assertTrue( file.exists() ); // "make sure you are running from: solr\src\test\test-files" + + ContentStreamBase stream = new ContentStreamBase.FileStream( file ); + assertEquals( file.length(), stream.getSize().intValue() ); + assertTrue( IOUtils.contentEquals( new FileInputStream( file ), stream.getStream() ) ); + assertTrue( IOUtils.contentEquals( new FileReader( file ), stream.getReader() ) ); + } + + + public void testURLStream() throws IOException + { + String content = null; + URL url = new URL( "http://svn.apache.org/repos/asf/lucene/solr/trunk/" ); + InputStream in = url.openStream(); + try { + content = IOUtils.toString( in ); + } + finally { + IOUtils.closeQuietly(in); + } + + assertTrue( content.length() > 10 ); // found something... + + ContentStreamBase stream = new ContentStreamBase.URLStream( url ); + assertEquals( content.length(), stream.getSize().intValue() ); + + // Test the stream + in = stream.getStream(); + try { + assertTrue( IOUtils.contentEquals( + new ByteArrayInputStream( content.getBytes() ), in ) ); + } + finally { + IOUtils.closeQuietly(in); + } + + // Re-open the stream and this time use a reader + stream = new ContentStreamBase.URLStream( url ); + assertTrue( IOUtils.contentEquals( new StringReader( content ), stream.getReader() ) ); + } +} diff --git a/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java b/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java index 7bdc825144b..ee3b79f69a7 100644 --- a/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java +++ b/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java @@ -17,11 +17,11 @@ package org.apache.solr.servlet; -import java.io.ByteArrayInputStream; +import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.io.Reader; import java.net.URL; -import java.net.URLConnection; import java.net.URLDecoder; import java.util.ArrayList; import java.util.HashMap; @@ -39,12 +39,13 @@ import org.apache.commons.fileupload.servlet.ServletFileUpload; import org.apache.solr.core.Config; import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrException; -import org.apache.solr.request.ContentStream; +import org.apache.solr.util.ContentStream; import org.apache.solr.request.MultiMapSolrParams; import org.apache.solr.request.ServletSolrParams; import org.apache.solr.request.SolrParams; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequestBase; +import org.apache.solr.util.ContentStreamBase; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; @@ -117,6 +118,9 @@ public class SolrRequestParsers SolrQueryRequest buildRequestFrom( SolrParams params, List streams ) throws Exception { + // The content type will be applied to all streaming content + String contentType = params.get( SolrParams.STREAM_CONTENTTYPE ); + // Handle anything with a remoteURL String[] strs = params.getParams( SolrParams.STREAM_URL ); if( strs != null ) { @@ -124,18 +128,26 @@ public class SolrRequestParsers throw new SolrException( 400, "Remote Streaming is disabled." ); } for( final String url : strs ) { - final URLConnection conn = new URL(url).openConnection(); - streams.add( new ContentStream() { - public String getContentType() { return conn.getContentType(); } - public String getName() { return url; } - public Long getSize() { return new Long( conn.getContentLength() ); } - public String getSourceInfo() { - return SolrParams.STREAM_URL; - } - public InputStream getStream() throws IOException { - return conn.getInputStream(); - } - }); + ContentStreamBase stream = new ContentStreamBase.URLStream( new URL(url) ); + if( contentType != null ) { + stream.setContentType( contentType ); + } + streams.add( stream ); + } + } + + // Handle streaming files + strs = params.getParams( SolrParams.STREAM_FILE ); + if( strs != null ) { + if( !enableRemoteStreams ) { + throw new SolrException( 400, "Remote Streaming is disabled." ); + } + for( final String file : strs ) { + ContentStreamBase stream = new ContentStreamBase.FileStream( new File(file) ); + if( contentType != null ) { + stream.setContentType( contentType ); + } + streams.add( stream ); } } @@ -143,17 +155,11 @@ public class SolrRequestParsers strs = params.getParams( SolrParams.STREAM_BODY ); if( strs != null ) { for( final String body : strs ) { - streams.add( new ContentStream() { - public String getContentType() { return null; } // Is there anything meaningful? - public String getName() { return null; } - public Long getSize() { return null; } - public String getSourceInfo() { - return SolrParams.STREAM_BODY; - } - public InputStream getStream() throws IOException { - return new ByteArrayInputStream( body.getBytes() ); - } - }); + ContentStreamBase stream = new ContentStreamBase.StringStream( body ); + if( contentType != null ) { + stream.setContentType( contentType ); + } + streams.add( stream ); } } @@ -161,7 +167,6 @@ public class SolrRequestParsers if( streams != null && streams.size() > 0 ) { q.setContentStreams( streams ); } - return q; } @@ -169,7 +174,7 @@ public class SolrRequestParsers /** * Given a standard query string map it into solr params */ - public static MultiMapSolrParams parseQueryString(String queryString) + public static MultiMapSolrParams parseQueryString(String queryString) { Map map = new HashMap(); if( queryString != null && queryString.length() > 0 ) { @@ -245,6 +250,9 @@ class RawRequestParser implements SolrRequestParser public InputStream getStream() throws IOException { return req.getInputStream(); } + public Reader getReader() throws IOException { + return req.getReader(); + } }); return SolrRequestParsers.parseQueryString( req.getQueryString() ); } @@ -307,7 +315,7 @@ class MultipartRequestParser implements SolrRequestParser /** * Wrap a FileItem as a ContentStream */ - private static class FileItemContentStream implements ContentStream + private static class FileItemContentStream extends ContentStreamBase { FileItem item; @@ -379,3 +387,4 @@ class StandardRequestParser implements SolrRequestParser +