mirror of https://github.com/apache/lucene.git
SOLR-7938: MergeStream now supports merging more than 2 streams together
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1713190 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3f21788fbd
commit
8309dc9b32
|
@ -83,6 +83,7 @@ New Features
|
||||||
* SOLR-6273: Cross Data Center Replication. Active/passive replication for separate
|
* SOLR-6273: Cross Data Center Replication. Active/passive replication for separate
|
||||||
SolrClouds hosted on separate data centers. (Renaud Delbru, Yonik Seeley via Erick Erickson)
|
SolrClouds hosted on separate data centers. (Renaud Delbru, Yonik Seeley via Erick Erickson)
|
||||||
|
|
||||||
|
* SOLR-7938: MergeStream now supports merging more than 2 streams together (Dennis Gove)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
|
|
|
@ -32,21 +32,22 @@ import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionValue;
|
||||||
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
|
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unions streamA with streamB ordering the Tuples based on a Comparator.
|
* Merges two or more streams together ordering the Tuples based on a Comparator.
|
||||||
* Both streams must be sorted by the fields being compared.
|
* All streams must be sorted by the fields being compared - this will be validated on construction.
|
||||||
**/
|
**/
|
||||||
|
|
||||||
|
|
||||||
public class MergeStream extends TupleStream implements Expressible {
|
public class MergeStream extends TupleStream implements Expressible {
|
||||||
|
|
||||||
private static final long serialVersionUID = 1;
|
private static final long serialVersionUID = 1;
|
||||||
|
|
||||||
private PushBackStream streamA;
|
private PushBackStream[] streams;
|
||||||
private PushBackStream streamB;
|
|
||||||
private StreamComparator comp;
|
private StreamComparator comp;
|
||||||
|
|
||||||
public MergeStream(TupleStream streamA, TupleStream streamB, StreamComparator comp) throws IOException {
|
public MergeStream(TupleStream streamA, TupleStream streamB, StreamComparator comp) throws IOException {
|
||||||
init(streamA, streamB, comp);
|
init(comp, streamA, streamB);
|
||||||
|
}
|
||||||
|
|
||||||
|
public MergeStream(StreamComparator comp, TupleStream ... streams) throws IOException {
|
||||||
|
init(comp, streams);
|
||||||
}
|
}
|
||||||
|
|
||||||
public MergeStream(StreamExpression expression,StreamFactory factory) throws IOException {
|
public MergeStream(StreamExpression expression,StreamFactory factory) throws IOException {
|
||||||
|
@ -59,29 +60,39 @@ public class MergeStream extends TupleStream implements Expressible {
|
||||||
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - unknown operands found", expression));
|
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - unknown operands found", expression));
|
||||||
}
|
}
|
||||||
|
|
||||||
if(2 != streamExpressions.size()){
|
if(streamExpressions.size() < 2){
|
||||||
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting two streams but found %d (must be PushBackStream types)",expression, streamExpressions.size()));
|
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting at least two streams but found %d (must be PushBackStream types)",expression, streamExpressions.size()));
|
||||||
}
|
}
|
||||||
|
|
||||||
if(null == onExpression || !(onExpression.getParameter() instanceof StreamExpressionValue)){
|
if(null == onExpression || !(onExpression.getParameter() instanceof StreamExpressionValue)){
|
||||||
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting single 'on' parameter listing fields to merge on but didn't find one",expression));
|
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting single 'on' parameter listing fields to merge on but didn't find one",expression));
|
||||||
}
|
}
|
||||||
|
|
||||||
init( factory.constructStream(streamExpressions.get(0)),
|
TupleStream[] streams = new TupleStream[streamExpressions.size()];
|
||||||
factory.constructStream(streamExpressions.get(1)),
|
for(int idx = 0; idx < streamExpressions.size(); ++idx){
|
||||||
factory.constructComparator(((StreamExpressionValue)onExpression.getParameter()).getValue(), FieldComparator.class)
|
streams[idx] = factory.constructStream(streamExpressions.get(idx));
|
||||||
|
}
|
||||||
|
|
||||||
|
init( factory.constructComparator(((StreamExpressionValue)onExpression.getParameter()).getValue(), FieldComparator.class),
|
||||||
|
streams
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void init(TupleStream streamA, TupleStream streamB, StreamComparator comp) throws IOException {
|
private void init(StreamComparator comp, TupleStream ... streams) throws IOException {
|
||||||
this.streamA = new PushBackStream(streamA);
|
|
||||||
this.streamB = new PushBackStream(streamB);
|
// All streams must both be sorted so that comp can be derived from
|
||||||
this.comp = comp;
|
for(TupleStream stream : streams){
|
||||||
|
if(!comp.isDerivedFrom(stream.getStreamSort())){
|
||||||
// streamA and streamB must both be sorted so that comp can be derived from
|
throw new IOException("Invalid MergeStream - all substream comparators (sort) must be a superset of this stream's comparator.");
|
||||||
if(!comp.isDerivedFrom(streamA.getStreamSort()) || !comp.isDerivedFrom(streamB.getStreamSort())){
|
}
|
||||||
throw new IOException("Invalid MergeStream - both substream comparators (sort) must be a superset of this stream's comparator.");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convert to PushBack streams so we can push back tuples
|
||||||
|
this.streams = new PushBackStream[streams.length];
|
||||||
|
for(int idx = 0; idx < streams.length; ++idx){
|
||||||
|
this.streams[idx] = new PushBackStream(streams[idx]);
|
||||||
|
}
|
||||||
|
this.comp = comp;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -90,8 +101,9 @@ public class MergeStream extends TupleStream implements Expressible {
|
||||||
StreamExpression expression = new StreamExpression(factory.getFunctionName(this.getClass()));
|
StreamExpression expression = new StreamExpression(factory.getFunctionName(this.getClass()));
|
||||||
|
|
||||||
// streams
|
// streams
|
||||||
expression.addParameter(streamA.toExpression(factory));
|
for(PushBackStream stream : streams){
|
||||||
expression.addParameter(streamB.toExpression(factory));
|
expression.addParameter(stream.toExpression(factory));
|
||||||
|
}
|
||||||
|
|
||||||
// on
|
// on
|
||||||
expression.addParameter(new StreamExpressionNamedParameter("on",comp.toExpression(factory)));
|
expression.addParameter(new StreamExpressionNamedParameter("on",comp.toExpression(factory)));
|
||||||
|
@ -100,54 +112,101 @@ public class MergeStream extends TupleStream implements Expressible {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setStreamContext(StreamContext context) {
|
public void setStreamContext(StreamContext context) {
|
||||||
this.streamA.setStreamContext(context);
|
for(PushBackStream stream : streams){
|
||||||
this.streamB.setStreamContext(context);
|
stream.setStreamContext(context);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<TupleStream> children() {
|
public List<TupleStream> children() {
|
||||||
List<TupleStream> l = new ArrayList();
|
List<TupleStream> l = new ArrayList();
|
||||||
l.add(streamA);
|
for(PushBackStream stream : streams){
|
||||||
l.add(streamB);
|
l.add(stream);
|
||||||
|
}
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void open() throws IOException {
|
public void open() throws IOException {
|
||||||
streamA.open();
|
for(PushBackStream stream : streams){
|
||||||
streamB.open();
|
stream.open();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
streamA.close();
|
for(PushBackStream stream : streams){
|
||||||
streamB.close();
|
stream.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public Tuple read() throws IOException {
|
public Tuple read() throws IOException {
|
||||||
Tuple a = streamA.read();
|
|
||||||
Tuple b = streamB.read();
|
// might be able to optimize this by sorting the streams based on the next to read tuple from each.
|
||||||
|
// if we can ensure the sort of the streams and update it in less than linear time then there would
|
||||||
if(a.EOF && b.EOF) {
|
// be some performance gain. But, assuming the # of streams is kinda small then this might not be
|
||||||
return a;
|
// worth it
|
||||||
|
|
||||||
|
Tuple minimum = null;
|
||||||
|
PushBackStream minimumStream = null;
|
||||||
|
for(PushBackStream stream : streams){
|
||||||
|
Tuple current = stream.read();
|
||||||
|
|
||||||
|
if(current.EOF){
|
||||||
|
stream.pushBack(current);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(null == minimum){
|
||||||
|
minimum = current;
|
||||||
|
minimumStream = stream;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(comp.compare(current, minimum) < 0){
|
||||||
|
// Push back on its stream
|
||||||
|
minimumStream.pushBack(minimum);
|
||||||
|
|
||||||
|
minimum = current;
|
||||||
|
minimumStream = stream;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
stream.pushBack(current);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(a.EOF) {
|
// If all EOF then min will be null, else min is the current minimum
|
||||||
streamA.pushBack(a);
|
if(null == minimum){
|
||||||
return b;
|
// return EOF, doesn't matter which cause we're done
|
||||||
}
|
return streams[0].read();
|
||||||
|
|
||||||
if(b.EOF) {
|
|
||||||
streamB.pushBack(b);
|
|
||||||
return a;
|
|
||||||
}
|
|
||||||
|
|
||||||
int c = comp.compare(a,b);
|
|
||||||
|
|
||||||
if(c < 0) {
|
|
||||||
streamB.pushBack(b);
|
|
||||||
return a;
|
|
||||||
} else {
|
|
||||||
streamA.pushBack(a);
|
|
||||||
return b;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return minimum;
|
||||||
|
|
||||||
|
// Tuple a = streamA.read();
|
||||||
|
// Tuple b = streamB.read();
|
||||||
|
//
|
||||||
|
// if(a.EOF && b.EOF) {
|
||||||
|
// return a;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if(a.EOF) {
|
||||||
|
// streamA.pushBack(a);
|
||||||
|
// return b;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if(b.EOF) {
|
||||||
|
// streamB.pushBack(b);
|
||||||
|
// return a;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// int c = comp.compare(a,b);
|
||||||
|
//
|
||||||
|
// if(c < 0) {
|
||||||
|
// streamB.pushBack(b);
|
||||||
|
// return a;
|
||||||
|
// } else {
|
||||||
|
// streamA.pushBack(a);
|
||||||
|
// return b;
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Return the stream sort - ie, the order in which records are returned */
|
/** Return the stream sort - ie, the order in which records are returned */
|
||||||
|
|
|
@ -341,6 +341,17 @@ public class StreamExpressionTest extends AbstractFullDistribZkTestBase {
|
||||||
assert(tuples.size() == 5);
|
assert(tuples.size() == 5);
|
||||||
assertOrder(tuples, 0,2,1,3,4);
|
assertOrder(tuples, 0,2,1,3,4);
|
||||||
|
|
||||||
|
// full factory w/multi streams
|
||||||
|
stream = factory.constructStream("merge("
|
||||||
|
+ "search(collection1, q=\"id:(0 4)\", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_s asc\"),"
|
||||||
|
+ "search(collection1, q=\"id:(1)\", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_s asc\"),"
|
||||||
|
+ "search(collection1, q=\"id:(2)\", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_s asc\"),"
|
||||||
|
+ "on=\"a_f asc\")");
|
||||||
|
tuples = getTuples(stream);
|
||||||
|
|
||||||
|
assert(tuples.size() == 4);
|
||||||
|
assertOrder(tuples, 0,2,1,4);
|
||||||
|
|
||||||
del("*:*");
|
del("*:*");
|
||||||
commit();
|
commit();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue