stream.contentType, stream.file: SOLR-197

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@523774 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2007-03-29 17:28:31 +00:00
parent 8e21217939
commit 18dc9d98e4
13 changed files with 445 additions and 90 deletions

View File

@ -131,6 +131,10 @@ New Features
18. SOLR-81: More SpellCheckerRequestHandler enhancements, inlcluding
support for relative or absolute directory path configurations, as
well as RAM based directory. (hossman)
19. SOLR-197: New parameters for input: stream.contentType for specifying
or overriding the content type of input, and stream.file for reading
local files. (Ryan McKinley via yonik)
Changes in runtime behavior
1. Highlighting using DisMax will only pick up terms from the main

View File

@ -18,11 +18,10 @@
package org.apache.solr.handler;
import java.io.IOException;
import java.util.Map;
import java.util.ArrayList;
import org.apache.commons.io.IOUtils;
import org.apache.solr.request.ContentStream;
import org.apache.solr.util.ContentStream;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryResponse;
import org.apache.solr.util.NamedList;

View File

@ -25,9 +25,10 @@ import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;
import org.apache.commons.io.IOUtils;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrException;
import org.apache.solr.request.ContentStream;
import org.apache.solr.util.ContentStream;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
@ -65,19 +66,7 @@ public class XmlUpdateRequestHandler extends RequestHandlerBase
throw new RuntimeException(e);
}
}
// TODO - this should be a general utility in another class
public static String getCharsetFromContentType( String contentType )
{
if( contentType != null ) {
int idx = contentType.toLowerCase().indexOf( "charset=" );
if( idx > 0 ) {
return contentType.substring( idx + "charset=".length() ).trim();
}
}
return null;
}
@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception
@ -87,21 +76,15 @@ public class XmlUpdateRequestHandler extends RequestHandlerBase
throw new SolrException( 400, "missing content stream" );
}
// Cycle through each stream
for( ContentStream stream : req.getContentStreams() ) {
String charset = getCharsetFromContentType( stream.getContentType() );
Reader reader = null;
if( charset == null ) {
reader = new InputStreamReader( stream.getStream() );
Reader reader = stream.getReader();
try {
rsp.add( "update", this.update( reader ) );
}
else {
reader = new InputStreamReader( stream.getStream(), charset );
finally {
IOUtils.closeQuietly(reader);
}
rsp.add( "update", this.update( reader ) );
// Make sure its closed
try { reader.close(); } catch( Exception ex ){}
}
}

View File

@ -17,13 +17,11 @@
package org.apache.solr.request;
import java.io.IOException;
import java.io.InputStream;
public interface ContentStream {
String getName();
String getSourceInfo();
String getContentType();
Long getSize(); // size if we know it, otherwise null
InputStream getStream() throws IOException;
@Deprecated
public interface ContentStream extends org.apache.solr.util.ContentStream {
// The contentstream should go in util because it is needed for
// SOLR-20 and does not need any lucene specific libraries.
// it is new since solr 1.1 so I think we can move it without the
// deprication...
}

View File

@ -115,13 +115,22 @@ public abstract class SolrParams {
*/
public static final String FACET_PREFIX = "facet.prefix";
/** If the content stream should come from a URL */
/** If the content stream should come from a URL (using URLConnection) */
public static final String STREAM_URL = "stream.url";
/** If the content stream should come from a File (using FileReader) */
public static final String STREAM_FILE = "stream.file";
/** If the content stream should come directly from a field */
public static final String STREAM_BODY = "stream.body";
/**
* Explicity set the content type for the input stream
* If multiple streams are specified, the explicit contentType
* will be used for all of them.
*/
public static final String STREAM_CONTENTTYPE = "stream.contentType";
/** returns the String value of a param, or null if not set */
public abstract String get(String param);

View File

@ -19,6 +19,7 @@ package org.apache.solr.request;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.util.ContentStream;
import org.apache.solr.core.SolrCore;
import java.util.Map;
@ -123,3 +124,4 @@ public interface SolrQueryRequest {

View File

@ -18,6 +18,7 @@
package org.apache.solr.request;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.ContentStream;
import org.apache.solr.util.RefCounted;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.core.SolrCore;

View File

@ -0,0 +1,76 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.util;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import org.apache.commons.io.IOUtils;
/**
* @author ryan
* @version $Id$
* @since solr 1.2
*/
public interface ContentStream {
String getName();
String getSourceInfo();
String getContentType();
/**
* @return the stream size or <code>null</code> if not known
*/
Long getSize(); // size if we know it, otherwise null
/**
* Get an open stream. You are responsible for closing it. Consider using
* something like:
* <pre>
* InputStream stream = stream.getStream();
* try {
* // use the stream...
* }
* finally {
* IOUtils.closeQuietly(reader);
* }
* </pre>
*
* Only the first call to <code>getStream()</code> or <code>getReader()</code>
* is gaurenteed to work. The runtime behavior for aditional calls is undefined.
*/
InputStream getStream() throws IOException;
/**
* Get an open stream. You are responsible for closing it. Consider using
* something like:
* <pre>
* Reader reader = stream.getReader();
* try {
* // use the reader...
* }
* finally {
* IOUtils.closeQuietly(reader);
* }
* </pre>
*
* Only the first call to <code>getStream()</code> or <code>getReader()</code>
* is gaurenteed to work. The runtime behavior for aditional calls is undefined.
*/
Reader getReader() throws IOException;
}

View File

@ -0,0 +1,183 @@
package org.apache.solr.util;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;
import java.net.URLConnection;
/**
* Three concrete implementations for ContentStream - one for File/URL/String
*
* @author ryan
* @version $Id$
* @since solr 1.2
*/
public abstract class ContentStreamBase implements ContentStream
{
protected String name;
protected String sourceInfo;
protected String contentType;
protected Long size;
//---------------------------------------------------------------------
//---------------------------------------------------------------------
public static String getCharsetFromContentType( String contentType )
{
if( contentType != null ) {
int idx = contentType.toLowerCase().indexOf( "charset=" );
if( idx > 0 ) {
return contentType.substring( idx + "charset=".length() ).trim();
}
}
return null;
}
//------------------------------------------------------------------------
//------------------------------------------------------------------------
/**
* Construct a <code>ContentStream</code> from a <code>URL</code>
*
* This uses a {@Link URLConnection} to get the content stream
*/
public static class URLStream extends ContentStreamBase
{
private final URL url;
final URLConnection conn;
public URLStream( URL url ) throws IOException {
this.url = url;
this.conn = this.url.openConnection();
contentType = conn.getContentType();
name = url.toExternalForm();
size = new Long( conn.getContentLength() );
sourceInfo = "url";
}
public InputStream getStream() throws IOException {
return conn.getInputStream();
}
}
/**
* Construct a <code>ContentStream</code> from a <code>File</code>
*/
public static class FileStream extends ContentStreamBase
{
private final File file;
public FileStream( File f ) throws IOException {
file = f;
contentType = null; // ??
name = file.getName();
size = file.length();
sourceInfo = file.toURI().toString();
}
public InputStream getStream() throws IOException {
return new FileInputStream( file );
}
/**
* If an charset is defined (by the contentType) ues that, otherwise
* use a file reader
*/
public Reader getReader() throws IOException {
String charset = getCharsetFromContentType( contentType );
return charset == null
? new FileReader( file )
: new InputStreamReader( getStream(), charset );
}
}
/**
* Construct a <code>ContentStream</code> from a <code>File</code>
*/
public static class StringStream extends ContentStreamBase
{
private final String str;
public StringStream( String str ) {
this.str = str;
contentType = null;
name = null;
size = new Long( str.length() );
sourceInfo = "string";
}
public InputStream getStream() throws IOException {
return new ByteArrayInputStream( str.getBytes() );
}
/**
* If an charset is defined (by the contentType) ues that, otherwise
* use a StringReader
*/
public Reader getReader() throws IOException {
String charset = getCharsetFromContentType( contentType );
return charset == null
? new StringReader( str )
: new InputStreamReader( getStream(), charset );
}
}
/**
* Base reader implementation. If the contentType declares a
* charset use it, otherwise use the system default.
*/
public Reader getReader() throws IOException {
String charset = getCharsetFromContentType( contentType );
return charset == null
? new InputStreamReader( getStream() )
: new InputStreamReader( getStream(), charset );
}
//------------------------------------------------------------------
// Getters / Setters for overrideable attributes
//------------------------------------------------------------------
public String getContentType() {
return contentType;
}
public void setContentType(String contentType) {
this.contentType = contentType;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public Long getSize() {
return size;
}
public void setSize(Long size) {
this.size = size;
}
public String getSourceInfo() {
return sourceInfo;
}
public void setSourceInfo(String sourceInfo) {
this.sourceInfo = sourceInfo;
}
}

View File

@ -31,7 +31,7 @@ import org.apache.commons.io.IOUtils;
import org.apache.solr.core.Config;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.ContentStream;
import org.apache.solr.util.ContentStream;
import org.apache.solr.request.MapSolrParams;
import org.apache.solr.request.MultiMapSolrParams;
import org.apache.solr.request.SolrParams;
@ -84,6 +84,15 @@ public class SolrRequestParserTest extends AbstractSolrTestCase {
Collections.sort( input );
Collections.sort( output );
assertEquals( input.toString(), output.toString() );
// set the contentType and make sure tat gets set
String ctype = "text/xxx";
streams = new ArrayList<ContentStream>();
args.put( SolrParams.STREAM_CONTENTTYPE, new String[] {ctype} );
parser.buildRequestFrom( new MultiMapSolrParams( args ), streams );
for( ContentStream s : streams ) {
assertEquals( ctype, s.getContentType() );
}
}

View File

@ -17,20 +17,18 @@
package org.apache.solr.update;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.XmlUpdateRequestHandler;
import org.apache.solr.request.ContentStream;
import org.apache.solr.util.ContentStream;
import org.apache.solr.request.MapSolrParams;
import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.request.SolrQueryResponse;
import org.apache.solr.util.AbstractSolrTestCase;
import org.apache.solr.util.ContentStreamBase;
/**
*
@ -43,25 +41,18 @@ public class AutoCommitTest extends AbstractSolrTestCase {
public String getSolrConfigFile() { return "solrconfig.xml"; }
/**
* Take a string and make it an iterable ContentStream
*
* This should be moved to a helper class. (it is useful for the client too!)
*/
public static Collection<ContentStream> toContentStreams( final String str, final String contentType )
{
ArrayList<ContentStream> streams = new ArrayList<ContentStream>();
streams.add( new ContentStream() {
public String getContentType() { return contentType; }
public Long getSize() { return Long.valueOf( str.length() ); }
public String getName() { return null; }
public String getSourceInfo() { return null; }
public InputStream getStream() throws IOException {
return new ByteArrayInputStream( str.getBytes() );
}
});
return streams;
}
* Take a string and make it an iterable ContentStream
*
* This should be moved to a helper class. (it is useful for the client too!)
*/
public static Collection<ContentStream> toContentStreams( final String str, final String contentType )
{
ArrayList<ContentStream> streams = new ArrayList<ContentStream>();
ContentStreamBase stream = new ContentStreamBase.StringStream( str );
stream.setContentType( contentType );
streams.add( stream );
return streams;
}
public void testMaxDocs() throws Exception {

View File

@ -0,0 +1,91 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.util;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;
import org.apache.commons.io.IOUtils;
import junit.framework.TestCase;
/**
* @author ryan
*/
public class ContentStreamTest extends TestCase
{
public void testStringStream() throws IOException
{
String input = "aads ghaskdgasgldj asl sadg ajdsg &jag # @ hjsakg hsakdg hjkas s";
ContentStreamBase stream = new ContentStreamBase.StringStream( input );
assertEquals( input.length(), stream.getSize().intValue() );
assertEquals( input, IOUtils.toString( stream.getStream() ) );
assertEquals( input, IOUtils.toString( stream.getReader() ) );
}
public void testFileStream() throws IOException
{
File file = new File( "README" );
assertTrue( file.exists() ); // "make sure you are running from: solr\src\test\test-files"
ContentStreamBase stream = new ContentStreamBase.FileStream( file );
assertEquals( file.length(), stream.getSize().intValue() );
assertTrue( IOUtils.contentEquals( new FileInputStream( file ), stream.getStream() ) );
assertTrue( IOUtils.contentEquals( new FileReader( file ), stream.getReader() ) );
}
public void testURLStream() throws IOException
{
String content = null;
URL url = new URL( "http://svn.apache.org/repos/asf/lucene/solr/trunk/" );
InputStream in = url.openStream();
try {
content = IOUtils.toString( in );
}
finally {
IOUtils.closeQuietly(in);
}
assertTrue( content.length() > 10 ); // found something...
ContentStreamBase stream = new ContentStreamBase.URLStream( url );
assertEquals( content.length(), stream.getSize().intValue() );
// Test the stream
in = stream.getStream();
try {
assertTrue( IOUtils.contentEquals(
new ByteArrayInputStream( content.getBytes() ), in ) );
}
finally {
IOUtils.closeQuietly(in);
}
// Re-open the stream and this time use a reader
stream = new ContentStreamBase.URLStream( url );
assertTrue( IOUtils.contentEquals( new StringReader( content ), stream.getReader() ) );
}
}

View File

@ -17,11 +17,11 @@
package org.apache.solr.servlet;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.HashMap;
@ -39,12 +39,13 @@ import org.apache.commons.fileupload.servlet.ServletFileUpload;
import org.apache.solr.core.Config;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrException;
import org.apache.solr.request.ContentStream;
import org.apache.solr.util.ContentStream;
import org.apache.solr.request.MultiMapSolrParams;
import org.apache.solr.request.ServletSolrParams;
import org.apache.solr.request.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.util.ContentStreamBase;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
@ -117,6 +118,9 @@ public class SolrRequestParsers
SolrQueryRequest buildRequestFrom( SolrParams params, List<ContentStream> streams ) throws Exception
{
// The content type will be applied to all streaming content
String contentType = params.get( SolrParams.STREAM_CONTENTTYPE );
// Handle anything with a remoteURL
String[] strs = params.getParams( SolrParams.STREAM_URL );
if( strs != null ) {
@ -124,18 +128,26 @@ public class SolrRequestParsers
throw new SolrException( 400, "Remote Streaming is disabled." );
}
for( final String url : strs ) {
final URLConnection conn = new URL(url).openConnection();
streams.add( new ContentStream() {
public String getContentType() { return conn.getContentType(); }
public String getName() { return url; }
public Long getSize() { return new Long( conn.getContentLength() ); }
public String getSourceInfo() {
return SolrParams.STREAM_URL;
}
public InputStream getStream() throws IOException {
return conn.getInputStream();
}
});
ContentStreamBase stream = new ContentStreamBase.URLStream( new URL(url) );
if( contentType != null ) {
stream.setContentType( contentType );
}
streams.add( stream );
}
}
// Handle streaming files
strs = params.getParams( SolrParams.STREAM_FILE );
if( strs != null ) {
if( !enableRemoteStreams ) {
throw new SolrException( 400, "Remote Streaming is disabled." );
}
for( final String file : strs ) {
ContentStreamBase stream = new ContentStreamBase.FileStream( new File(file) );
if( contentType != null ) {
stream.setContentType( contentType );
}
streams.add( stream );
}
}
@ -143,17 +155,11 @@ public class SolrRequestParsers
strs = params.getParams( SolrParams.STREAM_BODY );
if( strs != null ) {
for( final String body : strs ) {
streams.add( new ContentStream() {
public String getContentType() { return null; } // Is there anything meaningful?
public String getName() { return null; }
public Long getSize() { return null; }
public String getSourceInfo() {
return SolrParams.STREAM_BODY;
}
public InputStream getStream() throws IOException {
return new ByteArrayInputStream( body.getBytes() );
}
});
ContentStreamBase stream = new ContentStreamBase.StringStream( body );
if( contentType != null ) {
stream.setContentType( contentType );
}
streams.add( stream );
}
}
@ -161,7 +167,6 @@ public class SolrRequestParsers
if( streams != null && streams.size() > 0 ) {
q.setContentStreams( streams );
}
return q;
}
@ -169,7 +174,7 @@ public class SolrRequestParsers
/**
* Given a standard query string map it into solr params
*/
public static MultiMapSolrParams parseQueryString(String queryString)
public static MultiMapSolrParams parseQueryString(String queryString)
{
Map<String,String[]> map = new HashMap<String, String[]>();
if( queryString != null && queryString.length() > 0 ) {
@ -245,6 +250,9 @@ class RawRequestParser implements SolrRequestParser
public InputStream getStream() throws IOException {
return req.getInputStream();
}
public Reader getReader() throws IOException {
return req.getReader();
}
});
return SolrRequestParsers.parseQueryString( req.getQueryString() );
}
@ -307,7 +315,7 @@ class MultipartRequestParser implements SolrRequestParser
/**
* Wrap a FileItem as a ContentStream
*/
private static class FileItemContentStream implements ContentStream
private static class FileItemContentStream extends ContentStreamBase
{
FileItem item;
@ -379,3 +387,4 @@ class StandardRequestParser implements SolrRequestParser