SOLR-10754: Add hist Stream Evaluator

This commit is contained in:
Joel Bernstein 2017-05-26 13:41:35 -04:00
parent 3e70745c79
commit d1436c4823
3 changed files with 148 additions and 2 deletions

View File

@ -191,6 +191,7 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
.withFunctionName("finddelay", FindDelayEvaluator.class) .withFunctionName("finddelay", FindDelayEvaluator.class)
.withFunctionName("sequence", SequenceEvaluator.class) .withFunctionName("sequence", SequenceEvaluator.class)
.withFunctionName("array", ArrayEvaluator.class) .withFunctionName("array", ArrayEvaluator.class)
.withFunctionName("hist", HistogramEvaluator.class)
// metrics // metrics
.withFunctionName("min", MinMetric.class) .withFunctionName("min", MinMetric.class)

View File

@ -0,0 +1,98 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.client.solrj.io.eval;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.math3.random.EmpiricalDistribution;
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.solr.client.solrj.io.Tuple;
import org.apache.solr.client.solrj.io.stream.expr.Explanation;
import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType;
import org.apache.solr.client.solrj.io.stream.expr.Expressible;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParameter;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
public class HistogramEvaluator extends ComplexEvaluator implements Expressible {
private static final long serialVersionUID = 1;
public HistogramEvaluator(StreamExpression expression, StreamFactory factory) throws IOException {
super(expression, factory);
}
public List<Map> evaluate(Tuple tuple) throws IOException {
StreamEvaluator colEval1 = subEvaluators.get(0);
List<Number> numbers1 = (List<Number>)colEval1.evaluate(tuple);
double[] column1 = new double[numbers1.size()];
for(int i=0; i<numbers1.size(); i++) {
column1[i] = numbers1.get(i).doubleValue();
}
int bins = 10;
if(subEvaluators.size() == 2) {
StreamEvaluator binsEval = subEvaluators.get(1);
Number binsNum = (Number) binsEval.evaluate(tuple);
bins = binsNum.intValue();
}
EmpiricalDistribution empiricalDistribution = new EmpiricalDistribution(bins);
empiricalDistribution.load(column1);
List<Map> binList = new ArrayList();
List<SummaryStatistics> summaries = empiricalDistribution.getBinStats();
for(SummaryStatistics statisticalSummary : summaries) {
Map map = new HashMap();
map.put("max", statisticalSummary.getMax());
map.put("mean", statisticalSummary.getMean());
map.put("min", statisticalSummary.getMin());
map.put("stdev", statisticalSummary.getStandardDeviation());
map.put("sum", statisticalSummary.getSum());
map.put("N", statisticalSummary.getN());
map.put("var", statisticalSummary.getVariance());
binList.add(map);
}
return binList;
}
@Override
public StreamExpressionParameter toExpression(StreamFactory factory) throws IOException {
StreamExpression expression = new StreamExpression(factory.getFunctionName(getClass()));
return expression;
}
@Override
public Explanation toExplanation(StreamFactory factory) throws IOException {
return new Explanation(nodeId.toString())
.withExpressionType(ExpressionType.EVALUATOR)
.withFunctionName(factory.getFunctionName(getClass()))
.withImplementingClass(getClass().getName())
.withExpression(toExpression(factory).toString());
}
}

View File

@ -5227,6 +5227,53 @@ public class StreamExpressionTest extends SolrCloudTestCase {
} }
} }
@Test
public void testHist() throws Exception {
String expr = "hist(sequence(100, 0, 1), 10)";
ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
paramsLoc.set("expr", expr);
paramsLoc.set("qt", "/stream");
String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS;
TupleStream solrStream = new SolrStream(url, paramsLoc);
StreamContext context = new StreamContext();
solrStream.setStreamContext(context);
List<Tuple> tuples = getTuples(solrStream);
assertTrue(tuples.size() == 1);
List<Map> hist = (List<Map>)tuples.get(0).get("return-value");
assertTrue(hist.size() == 10);
for(int i=0; i<hist.size(); i++) {
Map stats = hist.get(i);
assertTrue(((Number)stats.get("N")).intValue() == 10);
assertTrue(((Number)stats.get("min")).intValue() == 10*i);
assertTrue(((Number)stats.get("var")).doubleValue() == 9.166666666666666);
assertTrue(((Number)stats.get("stdev")).doubleValue() == 3.0276503540974917);
}
expr = "hist(sequence(100, 0, 1), 5)";
paramsLoc = new ModifiableSolrParams();
paramsLoc.set("expr", expr);
paramsLoc.set("qt", "/stream");
solrStream = new SolrStream(url, paramsLoc);
solrStream.setStreamContext(context);
tuples = getTuples(solrStream);
assertTrue(tuples.size() == 1);
hist = (List<Map>)tuples.get(0).get("return-value");
assertTrue(hist.size() == 5);
for(int i=0; i<hist.size(); i++) {
Map stats = hist.get(i);
assertTrue(((Number)stats.get("N")).intValue() == 20);
assertTrue(((Number)stats.get("min")).intValue() == 20*i);
assertTrue(((Number)stats.get("var")).doubleValue() == 35);
assertTrue(((Number)stats.get("stdev")).doubleValue() == 5.916079783099616);
}
}
@Test @Test
@ -5747,7 +5794,7 @@ public class StreamExpressionTest extends SolrCloudTestCase {
solrStream.setStreamContext(context); solrStream.setStreamContext(context);
List<Tuple> tuples = getTuples(solrStream); List<Tuple> tuples = getTuples(solrStream);
assertTrue(tuples.size() == 1); assertTrue(tuples.size() == 1);
List<Number> out = (List<Number>)tuples.get(0).get("out"); List<Number> out = (List<Number>)tuples.get(0).get("return-value");
assertTrue(out.size() == 6); assertTrue(out.size() == 6);
assertTrue(out.get(0).intValue() == 1); assertTrue(out.get(0).intValue() == 1);
assertTrue(out.get(1).intValue() == 2); assertTrue(out.get(1).intValue() == 2);
@ -5764,7 +5811,7 @@ public class StreamExpressionTest extends SolrCloudTestCase {
solrStream.setStreamContext(context); solrStream.setStreamContext(context);
tuples = getTuples(solrStream); tuples = getTuples(solrStream);
assertTrue(tuples.size() == 1); assertTrue(tuples.size() == 1);
out = (List<Number>)tuples.get(0).get("out"); out = (List<Number>)tuples.get(0).get("return-value");
assertTrue(out.size() == 6); assertTrue(out.size() == 6);
assertTrue(out.get(0).doubleValue() == 1.122D); assertTrue(out.get(0).doubleValue() == 1.122D);
assertTrue(out.get(1).doubleValue() == 2.222D); assertTrue(out.get(1).doubleValue() == 2.222D);