SOLR-7904: Add StreamExpression Support to FacetStream

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1719838 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dennis Gove 2015-12-13 21:21:52 +00:00
parent 7da175b0b6
commit 1fc12ffd41
5 changed files with 847 additions and 15 deletions

View File

@ -116,6 +116,8 @@ New Features
* SOLR-8337: Add ReduceOperation and wire it into the ReducerStream (Joel Bernstein)
* SOLR-7904: Add StreamExpression Support to FacetStream (Dennis Gove)
Bug Fixes
----------------------
* SOLR-8386: Add field option in the new admin UI schema page loads up even when no schemaFactory has been

View File

@ -33,6 +33,7 @@ import org.apache.solr.client.solrj.io.comp.StreamComparator;
import org.apache.solr.client.solrj.io.ops.GroupOperation;
import org.apache.solr.client.solrj.io.stream.CloudSolrStream;
import org.apache.solr.client.solrj.io.stream.ExceptionStream;
import org.apache.solr.client.solrj.io.stream.FacetStream;
import org.apache.solr.client.solrj.io.stream.InnerJoinStream;
import org.apache.solr.client.solrj.io.stream.LeftOuterJoinStream;
import org.apache.solr.client.solrj.io.stream.HashJoinStream;
@ -111,6 +112,7 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware {
.withFunctionName("leftOuterJoin", LeftOuterJoinStream.class)
.withFunctionName("hashJoin", HashJoinStream.class)
.withFunctionName("outerHashJoin", OuterHashJoinStream.class)
.withFunctionName("facet", FacetStream.class)
// metrics
.withFunctionName("min", MinMetric.class)

View File

@ -20,15 +20,26 @@ package org.apache.solr.client.solrj.io.stream;
import java.io.IOException;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Collections;
import java.util.Map.Entry;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.io.SolrClientCache;
import org.apache.solr.client.solrj.io.Tuple;
import org.apache.solr.client.solrj.io.comp.ComparatorOrder;
import org.apache.solr.client.solrj.io.comp.FieldComparator;
import org.apache.solr.client.solrj.io.comp.MultipleFieldComparator;
import org.apache.solr.client.solrj.io.comp.StreamComparator;
import org.apache.solr.client.solrj.io.stream.expr.Expressible;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionNamedParameter;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParameter;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionValue;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
import org.apache.solr.client.solrj.io.stream.metrics.Bucket;
import org.apache.solr.client.solrj.io.stream.metrics.Metric;
import org.apache.solr.client.solrj.request.QueryRequest;
@ -40,14 +51,14 @@ import org.apache.solr.common.util.NamedList;
* RollupStream which uses Map/Reduce to perform aggregations.
**/
public class FacetStream extends TupleStream {
public class FacetStream extends TupleStream implements Expressible {
private static final long serialVersionUID = 1;
private Bucket[] buckets;
private Metric[] metrics;
private int limit;
private FieldComparator[] sorts;
private int bucketSizeLimit;
private FieldComparator[] bucketSorts;
private List<Tuple> tuples = new ArrayList();
private int index;
private String zkHost;
@ -61,25 +72,199 @@ public class FacetStream extends TupleStream {
Map<String, String> props,
Bucket[] buckets,
Metric[] metrics,
FieldComparator[] sorts,
int limit) throws IOException {
FieldComparator[] bucketSorts,
int bucketSizeLimit) throws IOException {
init(collection, props, buckets, bucketSorts, metrics, bucketSizeLimit, zkHost);
}
public FacetStream(StreamExpression expression, StreamFactory factory) throws IOException{
// grab all parameters out
String collectionName = factory.getValueOperand(expression, 0);
List<StreamExpressionNamedParameter> namedParams = factory.getNamedOperands(expression);
StreamExpressionNamedParameter bucketExpression = factory.getNamedOperand(expression, "buckets");
StreamExpressionNamedParameter bucketSortExpression = factory.getNamedOperand(expression, "bucketSorts");
List<StreamExpression> metricExpressions = factory.getExpressionOperandsRepresentingTypes(expression, Expressible.class, Metric.class);
StreamExpressionNamedParameter limitExpression = factory.getNamedOperand(expression, "bucketSizeLimit");
StreamExpressionNamedParameter zkHostExpression = factory.getNamedOperand(expression, "zkHost");
// Validate there are no unknown parameters
if(expression.getParameters().size() != 1 + namedParams.size() + metricExpressions.size()){
throw new IOException(String.format(Locale.ROOT,"invalid expression %s - unknown operands found",expression));
}
// Collection Name
if(null == collectionName){
throw new IOException(String.format(Locale.ROOT,"invalid expression %s - collectionName expected as first operand",expression));
}
// Named parameters - passed directly to solr as solrparams
if(0 == namedParams.size()){
throw new IOException(String.format(Locale.ROOT,"invalid expression %s - at least one named parameter expected. eg. 'q=*:*'",expression));
}
// pull out known named params
Map<String,String> params = new HashMap<String,String>();
for(StreamExpressionNamedParameter namedParam : namedParams){
if(!namedParam.getName().equals("zkHost") && !namedParam.getName().equals("buckets") && !namedParam.getName().equals("bucketSorts") && !namedParam.getName().equals("limit")){
params.put(namedParam.getName(), namedParam.getParameter().toString().trim());
}
}
// buckets, required - comma separated
Bucket[] buckets = null;
if(null != bucketExpression){
if(bucketExpression.getParameter() instanceof StreamExpressionValue){
String[] keys = ((StreamExpressionValue)bucketExpression.getParameter()).getValue().split(",");
if(0 != keys.length){
buckets = new Bucket[keys.length];
for(int idx = 0; idx < keys.length; ++idx){
buckets[idx] = new Bucket(keys[idx].trim());
}
}
}
}
if(null == buckets){
throw new IOException(String.format(Locale.ROOT,"invalid expression %s - at least one bucket expected. eg. 'buckets=\"name\"'",expression,collectionName));
}
// bucketSorts, required
FieldComparator[] bucketSorts = null;
if(null != bucketSortExpression){
if(bucketSortExpression.getParameter() instanceof StreamExpressionValue){
bucketSorts = parseBucketSorts(((StreamExpressionValue)bucketSortExpression.getParameter()).getValue());
}
}
if(null == bucketSorts || 0 == bucketSorts.length){
throw new IOException(String.format(Locale.ROOT,"invalid expression %s - at least one bucket sort expected. eg. 'bucketSorts=\"name asc\"'",expression,collectionName));
}
// Construct the metrics
Metric[] metrics = new Metric[metricExpressions.size()];
for(int idx = 0; idx < metricExpressions.size(); ++idx){
metrics[idx] = factory.constructMetric(metricExpressions.get(idx));
}
if(0 == metrics.length){
throw new IOException(String.format(Locale.ROOT,"invalid expression %s - at least one metric expected.",expression,collectionName));
}
if(null == limitExpression || null == limitExpression.getParameter() || !(limitExpression.getParameter() instanceof StreamExpressionValue)){
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting a single 'limit' parameter of type positive integer but didn't find one",expression));
}
String limitStr = ((StreamExpressionValue)limitExpression.getParameter()).getValue();
int limitInt = 0;
try{
limitInt = Integer.parseInt(limitStr);
if(limitInt <= 0){
throw new IOException(String.format(Locale.ROOT,"invalid expression %s - limit '%s' must be greater than 0.",expression, limitStr));
}
}
catch(NumberFormatException e){
throw new IOException(String.format(Locale.ROOT,"invalid expression %s - limit '%s' is not a valid integer.",expression, limitStr));
}
// zkHost, optional - if not provided then will look into factory list to get
String zkHost = null;
if(null == zkHostExpression){
zkHost = factory.getCollectionZkHost(collectionName);
}
else if(zkHostExpression.getParameter() instanceof StreamExpressionValue){
zkHost = ((StreamExpressionValue)zkHostExpression.getParameter()).getValue();
}
if(null == zkHost){
throw new IOException(String.format(Locale.ROOT,"invalid expression %s - zkHost not found for collection '%s'",expression,collectionName));
}
// We've got all the required items
init(collectionName, params, buckets, bucketSorts, metrics, limitInt, zkHost);
}
private FieldComparator[] parseBucketSorts(String bucketSortString) throws IOException {
String[] sorts = bucketSortString.split(",");
FieldComparator[] comps = new FieldComparator[sorts.length];
for(int i=0; i<sorts.length; i++) {
String s = sorts[i];
String[] spec = s.trim().split("\\s+"); //This should take into account spaces in the sort spec.
if(2 != spec.length){
throw new IOException(String.format(Locale.ROOT,"invalid expression - bad bucketSort '%s'. Expected form 'field order'",bucketSortString));
}
String fieldName = spec[0].trim();
String order = spec[1].trim();
comps[i] = new FieldComparator(fieldName, order.equalsIgnoreCase("asc") ? ComparatorOrder.ASCENDING : ComparatorOrder.DESCENDING);
}
return comps;
}
private void init(String collection, Map<String, String> props, Bucket[] buckets, FieldComparator[] bucketSorts, Metric[] metrics, int bucketSizeLimit, String zkHost) throws IOException {
this.zkHost = zkHost;
this.props = props;
this.buckets = buckets;
this.metrics = metrics;
this.limit = limit;
this.bucketSizeLimit = bucketSizeLimit;
this.collection = collection;
this.sorts = sorts;
this.bucketSorts = bucketSorts;
// In a facet world it only makes sense to have the same field name in all of the sorters
// Because FieldComparator allows for left and right field names we will need to validate
// that they are the same
for(FieldComparator sort : sorts){
for(FieldComparator sort : bucketSorts){
if(sort.hasDifferentFieldNames()){
throw new IOException("Invalid FacetStream - all sorts must be constructed with a single field name.");
}
}
}
@Override
public StreamExpressionParameter toExpression(StreamFactory factory) throws IOException {
// function name
StreamExpression expression = new StreamExpression(factory.getFunctionName(this.getClass()));
// collection
expression.addParameter(collection);
// parameters
for(Entry<String,String> param : props.entrySet()){
expression.addParameter(new StreamExpressionNamedParameter(param.getKey(), param.getValue()));
}
// buckets
{
StringBuilder builder = new StringBuilder();
for(Bucket bucket : buckets){
if(0 != builder.length()){ builder.append(","); }
builder.append(bucket.toString());
}
expression.addParameter(new StreamExpressionNamedParameter("buckets", builder.toString()));
}
// bucketSorts
{
StringBuilder builder = new StringBuilder();
for(FieldComparator sort : bucketSorts){
if(0 != builder.length()){ builder.append(","); }
builder.append(sort.toExpression(factory));
}
expression.addParameter(new StreamExpressionNamedParameter("bucketSorts", builder.toString()));
}
// metrics
for(Metric metric : metrics){
expression.addParameter(metric.toExpression(factory));
}
// limit
expression.addParameter(new StreamExpressionNamedParameter("bucketSizeLimit", Integer.toString(bucketSizeLimit)));
// zkHost
expression.addParameter(new StreamExpressionNamedParameter("zkHost", zkHost));
return expression;
}
public void setStreamContext(StreamContext context) {
cache = context.getSolrClientCache();
@ -97,8 +282,8 @@ public class FacetStream extends TupleStream {
cloudSolrClient = new CloudSolrClient(zkHost);
}
FieldComparator[] adjustedSorts = adjustSorts(buckets, sorts);
String json = getJsonFacetString(buckets, metrics, adjustedSorts, limit);
FieldComparator[] adjustedSorts = adjustSorts(buckets, bucketSorts);
String json = getJsonFacetString(buckets, metrics, adjustedSorts, bucketSizeLimit);
ModifiableSolrParams params = getParams(this.props);
params.add("json.facet", json);
@ -121,7 +306,7 @@ public class FacetStream extends TupleStream {
}
public Tuple read() throws IOException {
if(index < tuples.size() && index < limit) {
if(index < tuples.size() && index < bucketSizeLimit) {
Tuple tuple = tuples.get(index);
++index;
return tuple;
@ -286,10 +471,10 @@ public class FacetStream extends TupleStream {
@Override
public StreamComparator getStreamSort() {
if(sorts.length > 1) {
return new MultipleFieldComparator(sorts);
if(bucketSorts.length > 1) {
return new MultipleFieldComparator(bucketSorts);
} else {
return sorts[0];
return bucketSorts[0];
}
}
}

View File

@ -28,13 +28,17 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.client.solrj.io.Tuple;
import org.apache.solr.client.solrj.io.ops.GroupOperation;
import org.apache.solr.client.solrj.io.comp.ComparatorOrder;
import org.apache.solr.client.solrj.io.comp.FieldComparator;
import org.apache.solr.client.solrj.io.ops.ReplaceOperation;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParser;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
import org.apache.solr.client.solrj.io.stream.metrics.Bucket;
import org.apache.solr.client.solrj.io.stream.metrics.CountMetric;
import org.apache.solr.client.solrj.io.stream.metrics.MaxMetric;
import org.apache.solr.client.solrj.io.stream.metrics.MeanMetric;
import org.apache.solr.client.solrj.io.stream.metrics.Metric;
import org.apache.solr.client.solrj.io.stream.metrics.MinMetric;
import org.apache.solr.client.solrj.io.stream.metrics.SumMetric;
import org.apache.solr.cloud.AbstractFullDistribZkTestBase;
@ -138,6 +142,8 @@ public class StreamExpressionTest extends AbstractFullDistribZkTestBase {
testHashJoinStream();
testOuterHashJoinStream();
testSelectStream();
testFacetStream();
testSubFacetStream();
}
private void testCloudSolrStream() throws Exception {
@ -1390,6 +1396,606 @@ public class StreamExpressionTest extends AbstractFullDistribZkTestBase {
commit();
}
private void testFacetStream() throws Exception {
indexr(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1");
indexr(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2");
indexr(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3");
indexr(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4");
indexr(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5");
indexr(id, "5", "a_s", "hello3", "a_i", "10", "a_f", "6");
indexr(id, "6", "a_s", "hello4", "a_i", "11", "a_f", "7");
indexr(id, "7", "a_s", "hello3", "a_i", "12", "a_f", "8");
indexr(id, "8", "a_s", "hello3", "a_i", "13", "a_f", "9");
indexr(id, "9", "a_s", "hello0", "a_i", "14", "a_f", "10");
commit();
String clause;
TupleStream stream;
List<Tuple> tuples;
StreamFactory factory = new StreamFactory()
.withCollectionZkHost("collection1", zkServer.getZkAddress())
.withFunctionName("facet", FacetStream.class)
.withFunctionName("sum", SumMetric.class)
.withFunctionName("min", MinMetric.class)
.withFunctionName("max", MaxMetric.class)
.withFunctionName("avg", MeanMetric.class)
.withFunctionName("count", CountMetric.class);
// Basic test
clause = "facet("
+ "collection1, "
+ "q=\"*:*\", "
+ "fl=\"a_s,a_i,a_f\", "
+ "sort=\"a_s asc\", "
+ "buckets=\"a_s\", "
+ "bucketSorts=\"sum(a_i) asc\", "
+ "bucketSizeLimit=100, "
+ "sum(a_i), sum(a_f), "
+ "min(a_i), min(a_f), "
+ "max(a_i), max(a_f), "
+ "avg(a_i), avg(a_f), "
+ "count(*)"
+ ")";
stream = factory.constructStream(clause);
tuples = getTuples(stream);
assert(tuples.size() == 3);
//Test Long and Double Sums
Tuple tuple = tuples.get(0);
String bucket = tuple.getString("a_s");
Double sumi = tuple.getDouble("sum(a_i)");
Double sumf = tuple.getDouble("sum(a_f)");
Double mini = tuple.getDouble("min(a_i)");
Double minf = tuple.getDouble("min(a_f)");
Double maxi = tuple.getDouble("max(a_i)");
Double maxf = tuple.getDouble("max(a_f)");
Double avgi = tuple.getDouble("avg(a_i)");
Double avgf = tuple.getDouble("avg(a_f)");
Double count = tuple.getDouble("count(*)");
assertTrue(bucket.equals("hello4"));
assertTrue(sumi.longValue() == 15);
assertTrue(sumf.doubleValue() == 11.0D);
assertTrue(mini.doubleValue() == 4.0D);
assertTrue(minf.doubleValue() == 4.0D);
assertTrue(maxi.doubleValue() == 11.0D);
assertTrue(maxf.doubleValue() == 7.0D);
assertTrue(avgi.doubleValue() == 7.5D);
assertTrue(avgf.doubleValue() == 5.5D);
assertTrue(count.doubleValue() == 2);
tuple = tuples.get(1);
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
sumf = tuple.getDouble("sum(a_f)");
mini = tuple.getDouble("min(a_i)");
minf = tuple.getDouble("min(a_f)");
maxi = tuple.getDouble("max(a_i)");
maxf = tuple.getDouble("max(a_f)");
avgi = tuple.getDouble("avg(a_i)");
avgf = tuple.getDouble("avg(a_f)");
count = tuple.getDouble("count(*)");
assertTrue(bucket.equals("hello0"));
assertTrue(sumi.doubleValue() == 17.0D);
assertTrue(sumf.doubleValue() == 18.0D);
assertTrue(mini.doubleValue() == 0.0D);
assertTrue(minf.doubleValue() == 1.0D);
assertTrue(maxi.doubleValue() == 14.0D);
assertTrue(maxf.doubleValue() == 10.0D);
assertTrue(avgi.doubleValue() == 4.25D);
assertTrue(avgf.doubleValue() == 4.5D);
assertTrue(count.doubleValue() == 4);
tuple = tuples.get(2);
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
sumf = tuple.getDouble("sum(a_f)");
mini = tuple.getDouble("min(a_i)");
minf = tuple.getDouble("min(a_f)");
maxi = tuple.getDouble("max(a_i)");
maxf = tuple.getDouble("max(a_f)");
avgi = tuple.getDouble("avg(a_i)");
avgf = tuple.getDouble("avg(a_f)");
count = tuple.getDouble("count(*)");
assertTrue(bucket.equals("hello3"));
assertTrue(sumi.doubleValue() == 38.0D);
assertTrue(sumf.doubleValue() == 26.0D);
assertTrue(mini.doubleValue() == 3.0D);
assertTrue(minf.doubleValue() == 3.0D);
assertTrue(maxi.doubleValue() == 13.0D);
assertTrue(maxf.doubleValue() == 9.0D);
assertTrue(avgi.doubleValue() == 9.5D);
assertTrue(avgf.doubleValue() == 6.5D);
assertTrue(count.doubleValue() == 4);
//Reverse the Sort.
clause = "facet("
+ "collection1, "
+ "q=\"*:*\", "
+ "fl=\"a_s,a_i,a_f\", "
+ "sort=\"a_s asc\", "
+ "buckets=\"a_s\", "
+ "bucketSorts=\"sum(a_i) desc\", "
+ "bucketSizeLimit=100, "
+ "sum(a_i), sum(a_f), "
+ "min(a_i), min(a_f), "
+ "max(a_i), max(a_f), "
+ "avg(a_i), avg(a_f), "
+ "count(*)"
+ ")";
stream = factory.constructStream(clause);
tuples = getTuples(stream);
//Test Long and Double Sums
tuple = tuples.get(0);
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
sumf = tuple.getDouble("sum(a_f)");
mini = tuple.getDouble("min(a_i)");
minf = tuple.getDouble("min(a_f)");
maxi = tuple.getDouble("max(a_i)");
maxf = tuple.getDouble("max(a_f)");
avgi = tuple.getDouble("avg(a_i)");
avgf = tuple.getDouble("avg(a_f)");
count = tuple.getDouble("count(*)");
assertTrue(bucket.equals("hello3"));
assertTrue(sumi.doubleValue() == 38.0D);
assertTrue(sumf.doubleValue() == 26.0D);
assertTrue(mini.doubleValue() == 3.0D);
assertTrue(minf.doubleValue() == 3.0D);
assertTrue(maxi.doubleValue() == 13.0D);
assertTrue(maxf.doubleValue() == 9.0D);
assertTrue(avgi.doubleValue() == 9.5D);
assertTrue(avgf.doubleValue() == 6.5D);
assertTrue(count.doubleValue() == 4);
tuple = tuples.get(1);
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
sumf = tuple.getDouble("sum(a_f)");
mini = tuple.getDouble("min(a_i)");
minf = tuple.getDouble("min(a_f)");
maxi = tuple.getDouble("max(a_i)");
maxf = tuple.getDouble("max(a_f)");
avgi = tuple.getDouble("avg(a_i)");
avgf = tuple.getDouble("avg(a_f)");
count = tuple.getDouble("count(*)");
assertTrue(bucket.equals("hello0"));
assertTrue(sumi.doubleValue() == 17.0D);
assertTrue(sumf.doubleValue() == 18.0D);
assertTrue(mini.doubleValue() == 0.0D);
assertTrue(minf.doubleValue() == 1.0D);
assertTrue(maxi.doubleValue() == 14.0D);
assertTrue(maxf.doubleValue() == 10.0D);
assertTrue(avgi.doubleValue() == 4.25D);
assertTrue(avgf.doubleValue() == 4.5D);
assertTrue(count.doubleValue() == 4);
tuple = tuples.get(2);
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
sumf = tuple.getDouble("sum(a_f)");
mini = tuple.getDouble("min(a_i)");
minf = tuple.getDouble("min(a_f)");
maxi = tuple.getDouble("max(a_i)");
maxf = tuple.getDouble("max(a_f)");
avgi = tuple.getDouble("avg(a_i)");
avgf = tuple.getDouble("avg(a_f)");
count = tuple.getDouble("count(*)");
assertTrue(bucket.equals("hello4"));
assertTrue(sumi.longValue() == 15);
assertTrue(sumf.doubleValue() == 11.0D);
assertTrue(mini.doubleValue() == 4.0D);
assertTrue(minf.doubleValue() == 4.0D);
assertTrue(maxi.doubleValue() == 11.0D);
assertTrue(maxf.doubleValue() == 7.0D);
assertTrue(avgi.doubleValue() == 7.5D);
assertTrue(avgf.doubleValue() == 5.5D);
assertTrue(count.doubleValue() == 2);
//Test index sort
clause = "facet("
+ "collection1, "
+ "q=\"*:*\", "
+ "fl=\"a_s,a_i,a_f\", "
+ "sort=\"a_s asc\", "
+ "buckets=\"a_s\", "
+ "bucketSorts=\"a_s desc\", "
+ "bucketSizeLimit=100, "
+ "sum(a_i), sum(a_f), "
+ "min(a_i), min(a_f), "
+ "max(a_i), max(a_f), "
+ "avg(a_i), avg(a_f), "
+ "count(*)"
+ ")";
stream = factory.constructStream(clause);
tuples = getTuples(stream);
assert(tuples.size() == 3);
tuple = tuples.get(0);
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
sumf = tuple.getDouble("sum(a_f)");
mini = tuple.getDouble("min(a_i)");
minf = tuple.getDouble("min(a_f)");
maxi = tuple.getDouble("max(a_i)");
maxf = tuple.getDouble("max(a_f)");
avgi = tuple.getDouble("avg(a_i)");
avgf = tuple.getDouble("avg(a_f)");
count = tuple.getDouble("count(*)");
assertTrue(bucket.equals("hello4"));
assertTrue(sumi.longValue() == 15);
assertTrue(sumf.doubleValue() == 11.0D);
assertTrue(mini.doubleValue() == 4.0D);
assertTrue(minf.doubleValue() == 4.0D);
assertTrue(maxi.doubleValue() == 11.0D);
assertTrue(maxf.doubleValue() == 7.0D);
assertTrue(avgi.doubleValue() == 7.5D);
assertTrue(avgf.doubleValue() == 5.5D);
assertTrue(count.doubleValue() == 2);
tuple = tuples.get(1);
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
sumf = tuple.getDouble("sum(a_f)");
mini = tuple.getDouble("min(a_i)");
minf = tuple.getDouble("min(a_f)");
maxi = tuple.getDouble("max(a_i)");
maxf = tuple.getDouble("max(a_f)");
avgi = tuple.getDouble("avg(a_i)");
avgf = tuple.getDouble("avg(a_f)");
count = tuple.getDouble("count(*)");
assertTrue(bucket.equals("hello3"));
assertTrue(sumi.doubleValue() == 38.0D);
assertTrue(sumf.doubleValue() == 26.0D);
assertTrue(mini.doubleValue() == 3.0D);
assertTrue(minf.doubleValue() == 3.0D);
assertTrue(maxi.doubleValue() == 13.0D);
assertTrue(maxf.doubleValue() == 9.0D);
assertTrue(avgi.doubleValue() == 9.5D);
assertTrue(avgf.doubleValue() == 6.5D);
assertTrue(count.doubleValue() == 4);
tuple = tuples.get(2);
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
sumf = tuple.getDouble("sum(a_f)");
mini = tuple.getDouble("min(a_i)");
minf = tuple.getDouble("min(a_f)");
maxi = tuple.getDouble("max(a_i)");
maxf = tuple.getDouble("max(a_f)");
avgi = tuple.getDouble("avg(a_i)");
avgf = tuple.getDouble("avg(a_f)");
count = tuple.getDouble("count(*)");
assertTrue(bucket.equals("hello0"));
assertTrue(sumi.doubleValue() == 17.0D);
assertTrue(sumf.doubleValue() == 18.0D);
assertTrue(mini.doubleValue() == 0.0D);
assertTrue(minf.doubleValue() == 1.0D);
assertTrue(maxi.doubleValue() == 14.0D);
assertTrue(maxf.doubleValue() == 10.0D);
assertTrue(avgi.doubleValue() == 4.25D);
assertTrue(avgf.doubleValue() == 4.5D);
assertTrue(count.doubleValue() == 4);
//Test index sort
clause = "facet("
+ "collection1, "
+ "q=\"*:*\", "
+ "fl=\"a_s,a_i,a_f\", "
+ "sort=\"a_s asc\", "
+ "buckets=\"a_s\", "
+ "bucketSorts=\"a_s asc\", "
+ "bucketSizeLimit=100, "
+ "sum(a_i), sum(a_f), "
+ "min(a_i), min(a_f), "
+ "max(a_i), max(a_f), "
+ "avg(a_i), avg(a_f), "
+ "count(*)"
+ ")";
stream = factory.constructStream(clause);
tuples = getTuples(stream);
assert(tuples.size() == 3);
tuple = tuples.get(0);
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
sumf = tuple.getDouble("sum(a_f)");
mini = tuple.getDouble("min(a_i)");
minf = tuple.getDouble("min(a_f)");
maxi = tuple.getDouble("max(a_i)");
maxf = tuple.getDouble("max(a_f)");
avgi = tuple.getDouble("avg(a_i)");
avgf = tuple.getDouble("avg(a_f)");
count = tuple.getDouble("count(*)");
assertTrue(bucket.equals("hello0"));
assertTrue(sumi.doubleValue() == 17.0D);
assertTrue(sumf.doubleValue() == 18.0D);
assertTrue(mini.doubleValue() == 0.0D);
assertTrue(minf.doubleValue() == 1.0D);
assertTrue(maxi.doubleValue() == 14.0D);
assertTrue(maxf.doubleValue() == 10.0D);
assertTrue(avgi.doubleValue() == 4.25D);
assertTrue(avgf.doubleValue() == 4.5D);
assertTrue(count.doubleValue() == 4);
tuple = tuples.get(1);
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
sumf = tuple.getDouble("sum(a_f)");
mini = tuple.getDouble("min(a_i)");
minf = tuple.getDouble("min(a_f)");
maxi = tuple.getDouble("max(a_i)");
maxf = tuple.getDouble("max(a_f)");
avgi = tuple.getDouble("avg(a_i)");
avgf = tuple.getDouble("avg(a_f)");
count = tuple.getDouble("count(*)");
assertTrue(bucket.equals("hello3"));
assertTrue(sumi.doubleValue() == 38.0D);
assertTrue(sumf.doubleValue() == 26.0D);
assertTrue(mini.doubleValue() == 3.0D);
assertTrue(minf.doubleValue() == 3.0D);
assertTrue(maxi.doubleValue() == 13.0D);
assertTrue(maxf.doubleValue() == 9.0D);
assertTrue(avgi.doubleValue() == 9.5D);
assertTrue(avgf.doubleValue() == 6.5D);
assertTrue(count.doubleValue() == 4);
tuple = tuples.get(2);
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
sumf = tuple.getDouble("sum(a_f)");
mini = tuple.getDouble("min(a_i)");
minf = tuple.getDouble("min(a_f)");
maxi = tuple.getDouble("max(a_i)");
maxf = tuple.getDouble("max(a_f)");
avgi = tuple.getDouble("avg(a_i)");
avgf = tuple.getDouble("avg(a_f)");
count = tuple.getDouble("count(*)");
assertTrue(bucket.equals("hello4"));
assertTrue(sumi.longValue() == 15);
assertTrue(sumf.doubleValue() == 11.0D);
assertTrue(mini.doubleValue() == 4.0D);
assertTrue(minf.doubleValue() == 4.0D);
assertTrue(maxi.doubleValue() == 11.0D);
assertTrue(maxf.doubleValue() == 7.0D);
assertTrue(avgi.doubleValue() == 7.5D);
assertTrue(avgf.doubleValue() == 5.5D);
assertTrue(count.doubleValue() == 2);
del("*:*");
commit();
}
private void testSubFacetStream() throws Exception {
indexr(id, "0", "level1_s", "hello0", "level2_s", "a", "a_i", "0", "a_f", "1");
indexr(id, "2", "level1_s", "hello0", "level2_s", "a", "a_i", "2", "a_f", "2");
indexr(id, "3", "level1_s", "hello3", "level2_s", "a", "a_i", "3", "a_f", "3");
indexr(id, "4", "level1_s", "hello4", "level2_s", "a", "a_i", "4", "a_f", "4");
indexr(id, "1", "level1_s", "hello0", "level2_s", "b", "a_i", "1", "a_f", "5");
indexr(id, "5", "level1_s", "hello3", "level2_s", "b", "a_i", "10", "a_f", "6");
indexr(id, "6", "level1_s", "hello4", "level2_s", "b", "a_i", "11", "a_f", "7");
indexr(id, "7", "level1_s", "hello3", "level2_s", "b", "a_i", "12", "a_f", "8");
indexr(id, "8", "level1_s", "hello3", "level2_s", "b", "a_i", "13", "a_f", "9");
indexr(id, "9", "level1_s", "hello0", "level2_s", "b", "a_i", "14", "a_f", "10");
commit();
String clause;
TupleStream stream;
List<Tuple> tuples;
StreamFactory factory = new StreamFactory()
.withCollectionZkHost("collection1", zkServer.getZkAddress())
.withFunctionName("facet", FacetStream.class)
.withFunctionName("sum", SumMetric.class)
.withFunctionName("min", MinMetric.class)
.withFunctionName("max", MaxMetric.class)
.withFunctionName("avg", MeanMetric.class)
.withFunctionName("count", CountMetric.class);
// Basic test
clause = "facet("
+ "collection1, "
+ "q=\"*:*\", "
+ "buckets=\"level1_s, level2_s\", "
+ "bucketSorts=\"sum(a_i) desc, sum(a_i) desc)\", "
+ "bucketSizeLimit=100, "
+ "sum(a_i), count(*)"
+ ")";
stream = factory.constructStream(clause);
tuples = getTuples(stream);
assert(tuples.size() == 6);
Tuple tuple = tuples.get(0);
String bucket1 = tuple.getString("level1_s");
String bucket2 = tuple.getString("level2_s");
Double sumi = tuple.getDouble("sum(a_i)");
Double count = tuple.getDouble("count(*)");
assertTrue(bucket1.equals("hello3"));
assertTrue(bucket2.equals("b"));
assertTrue(sumi.longValue() == 35);
assertTrue(count.doubleValue() == 3);
tuple = tuples.get(1);
bucket1 = tuple.getString("level1_s");
bucket2 = tuple.getString("level2_s");
sumi = tuple.getDouble("sum(a_i)");
count = tuple.getDouble("count(*)");
assertTrue(bucket1.equals("hello0"));
assertTrue(bucket2.equals("b"));
assertTrue(sumi.longValue() == 15);
assertTrue(count.doubleValue() == 2);
tuple = tuples.get(2);
bucket1 = tuple.getString("level1_s");
bucket2 = tuple.getString("level2_s");
sumi = tuple.getDouble("sum(a_i)");
count = tuple.getDouble("count(*)");
assertTrue(bucket1.equals("hello4"));
assertTrue(bucket2.equals("b"));
assertTrue(sumi.longValue() == 11);
assertTrue(count.doubleValue() == 1);
tuple = tuples.get(3);
bucket1 = tuple.getString("level1_s");
bucket2 = tuple.getString("level2_s");
sumi = tuple.getDouble("sum(a_i)");
count = tuple.getDouble("count(*)");
assertTrue(bucket1.equals("hello4"));
assertTrue(bucket2.equals("a"));
assertTrue(sumi.longValue() == 4);
assertTrue(count.doubleValue() == 1);
tuple = tuples.get(4);
bucket1 = tuple.getString("level1_s");
bucket2 = tuple.getString("level2_s");
sumi = tuple.getDouble("sum(a_i)");
count = tuple.getDouble("count(*)");
assertTrue(bucket1.equals("hello3"));
assertTrue(bucket2.equals("a"));
assertTrue(sumi.longValue() == 3);
assertTrue(count.doubleValue() == 1);
tuple = tuples.get(5);
bucket1 = tuple.getString("level1_s");
bucket2 = tuple.getString("level2_s");
sumi = tuple.getDouble("sum(a_i)");
count = tuple.getDouble("count(*)");
assertTrue(bucket1.equals("hello0"));
assertTrue(bucket2.equals("a"));
assertTrue(sumi.longValue() == 2);
assertTrue(count.doubleValue() == 2);
clause = "facet("
+ "collection1, "
+ "q=\"*:*\", "
+ "buckets=\"level1_s, level2_s\", "
+ "bucketSorts=\"level1_s desc, level2_s desc)\", "
+ "bucketSizeLimit=100, "
+ "sum(a_i), count(*)"
+ ")";
stream = factory.constructStream(clause);
tuples = getTuples(stream);
assert(tuples.size() == 6);
tuple = tuples.get(0);
bucket1 = tuple.getString("level1_s");
bucket2 = tuple.getString("level2_s");
sumi = tuple.getDouble("sum(a_i)");
count = tuple.getDouble("count(*)");
assertTrue(bucket1.equals("hello4"));
assertTrue(bucket2.equals("b"));
assertTrue(sumi.longValue() == 11);
assertTrue(count.doubleValue() == 1);
tuple = tuples.get(1);
bucket1 = tuple.getString("level1_s");
bucket2 = tuple.getString("level2_s");
sumi = tuple.getDouble("sum(a_i)");
count = tuple.getDouble("count(*)");
assertTrue(bucket1.equals("hello4"));
assertTrue(bucket2.equals("a"));
assertTrue(sumi.longValue() == 4);
assertTrue(count.doubleValue() == 1);
tuple = tuples.get(2);
bucket1 = tuple.getString("level1_s");
bucket2 = tuple.getString("level2_s");
sumi = tuple.getDouble("sum(a_i)");
count = tuple.getDouble("count(*)");
assertTrue(bucket1.equals("hello3"));
assertTrue(bucket2.equals("b"));
assertTrue(sumi.longValue() == 35);
assertTrue(count.doubleValue() == 3);
tuple = tuples.get(3);
bucket1 = tuple.getString("level1_s");
bucket2 = tuple.getString("level2_s");
sumi = tuple.getDouble("sum(a_i)");
count = tuple.getDouble("count(*)");
assertTrue(bucket1.equals("hello3"));
assertTrue(bucket2.equals("a"));
assertTrue(sumi.longValue() == 3);
assertTrue(count.doubleValue() == 1);
tuple = tuples.get(4);
bucket1 = tuple.getString("level1_s");
bucket2 = tuple.getString("level2_s");
sumi = tuple.getDouble("sum(a_i)");
count = tuple.getDouble("count(*)");
assertTrue(bucket1.equals("hello0"));
assertTrue(bucket2.equals("b"));
assertTrue(sumi.longValue() == 15);
assertTrue(count.doubleValue() == 2);
tuple = tuples.get(5);
bucket1 = tuple.getString("level1_s");
bucket2 = tuple.getString("level2_s");
sumi = tuple.getDouble("sum(a_i)");
count = tuple.getDouble("count(*)");
assertTrue(bucket1.equals("hello0"));
assertTrue(bucket2.equals("a"));
assertTrue(sumi.longValue() == 2);
assertTrue(count.doubleValue() == 2);
del("*:*");
commit();
}
protected List<Tuple> getTuples(TupleStream tupleStream) throws IOException {
tupleStream.open();
List<Tuple> tuples = new ArrayList<Tuple>();

View File

@ -47,7 +47,8 @@ public class StreamExpressionToExpessionTest extends LuceneTestCase {
.withFunctionName("top", RankStream.class)
.withFunctionName("reduce", ReducerStream.class)
.withFunctionName("group", GroupOperation.class)
.withFunctionName("stats", StatsStream.class)
.withFunctionName("stats", StatsStream.class)
.withFunctionName("facet", FacetStream.class)
.withFunctionName("count", CountMetric.class)
.withFunctionName("sum", SumMetric.class)
.withFunctionName("min", MinMetric.class)
@ -163,6 +164,42 @@ public class StreamExpressionToExpessionTest extends LuceneTestCase {
assertTrue(expressionString.contains("by=a_s"));
}
@Test
public void testFacetStream() throws Exception {
FacetStream stream;
String expressionString;
// Basic test
stream = new FacetStream(StreamExpressionParser.parse("facet("
+ "collection1, "
+ "q=\"*:*\", "
+ "buckets=\"a_s\", "
+ "bucketSorts=\"sum(a_i) asc\", "
+ "bucketSizeLimit=100, "
+ "sum(a_i), sum(a_f), "
+ "min(a_i), min(a_f), "
+ "max(a_i), max(a_f), "
+ "avg(a_i), avg(a_f), "
+ "count(*)"
+ ")"), factory);
expressionString = stream.toExpression(factory).toString();
assertTrue(expressionString.contains("facet(collection1"));
assertTrue(expressionString.contains("q=\"*:*\""));
assertTrue(expressionString.contains("buckets=a_s"));
assertTrue(expressionString.contains("bucketSorts=\"sum(a_i) asc\""));
assertTrue(expressionString.contains("bucketSizeLimit=100"));
assertTrue(expressionString.contains("sum(a_i)"));
assertTrue(expressionString.contains("sum(a_f)"));
assertTrue(expressionString.contains("min(a_i)"));
assertTrue(expressionString.contains("min(a_f)"));
assertTrue(expressionString.contains("max(a_i)"));
assertTrue(expressionString.contains("max(a_f)"));
assertTrue(expressionString.contains("avg(a_i)"));
assertTrue(expressionString.contains("avg(a_f)"));
assertTrue(expressionString.contains("count(*)"));
}
@Test
public void testCountMetric() throws Exception {