mirror of https://github.com/apache/lucene.git
SOLR-10754: Add hist Stream Evaluator
This commit is contained in:
parent
3e70745c79
commit
d1436c4823
|
@ -191,6 +191,7 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
|
||||||
.withFunctionName("finddelay", FindDelayEvaluator.class)
|
.withFunctionName("finddelay", FindDelayEvaluator.class)
|
||||||
.withFunctionName("sequence", SequenceEvaluator.class)
|
.withFunctionName("sequence", SequenceEvaluator.class)
|
||||||
.withFunctionName("array", ArrayEvaluator.class)
|
.withFunctionName("array", ArrayEvaluator.class)
|
||||||
|
.withFunctionName("hist", HistogramEvaluator.class)
|
||||||
|
|
||||||
// metrics
|
// metrics
|
||||||
.withFunctionName("min", MinMetric.class)
|
.withFunctionName("min", MinMetric.class)
|
||||||
|
|
|
@ -0,0 +1,98 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.client.solrj.io.eval;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.commons.math3.random.EmpiricalDistribution;
|
||||||
|
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
|
||||||
|
import org.apache.solr.client.solrj.io.Tuple;
|
||||||
|
import org.apache.solr.client.solrj.io.stream.expr.Explanation;
|
||||||
|
import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType;
|
||||||
|
import org.apache.solr.client.solrj.io.stream.expr.Expressible;
|
||||||
|
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
|
||||||
|
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParameter;
|
||||||
|
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
|
||||||
|
|
||||||
|
public class HistogramEvaluator extends ComplexEvaluator implements Expressible {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = 1;
|
||||||
|
|
||||||
|
public HistogramEvaluator(StreamExpression expression, StreamFactory factory) throws IOException {
|
||||||
|
super(expression, factory);
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Map> evaluate(Tuple tuple) throws IOException {
|
||||||
|
|
||||||
|
StreamEvaluator colEval1 = subEvaluators.get(0);
|
||||||
|
|
||||||
|
List<Number> numbers1 = (List<Number>)colEval1.evaluate(tuple);
|
||||||
|
double[] column1 = new double[numbers1.size()];
|
||||||
|
|
||||||
|
for(int i=0; i<numbers1.size(); i++) {
|
||||||
|
column1[i] = numbers1.get(i).doubleValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
int bins = 10;
|
||||||
|
if(subEvaluators.size() == 2) {
|
||||||
|
StreamEvaluator binsEval = subEvaluators.get(1);
|
||||||
|
Number binsNum = (Number) binsEval.evaluate(tuple);
|
||||||
|
bins = binsNum.intValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
EmpiricalDistribution empiricalDistribution = new EmpiricalDistribution(bins);
|
||||||
|
empiricalDistribution.load(column1);
|
||||||
|
|
||||||
|
List<Map> binList = new ArrayList();
|
||||||
|
|
||||||
|
List<SummaryStatistics> summaries = empiricalDistribution.getBinStats();
|
||||||
|
for(SummaryStatistics statisticalSummary : summaries) {
|
||||||
|
Map map = new HashMap();
|
||||||
|
map.put("max", statisticalSummary.getMax());
|
||||||
|
map.put("mean", statisticalSummary.getMean());
|
||||||
|
map.put("min", statisticalSummary.getMin());
|
||||||
|
map.put("stdev", statisticalSummary.getStandardDeviation());
|
||||||
|
map.put("sum", statisticalSummary.getSum());
|
||||||
|
map.put("N", statisticalSummary.getN());
|
||||||
|
map.put("var", statisticalSummary.getVariance());
|
||||||
|
binList.add(map);
|
||||||
|
}
|
||||||
|
|
||||||
|
return binList;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public StreamExpressionParameter toExpression(StreamFactory factory) throws IOException {
|
||||||
|
StreamExpression expression = new StreamExpression(factory.getFunctionName(getClass()));
|
||||||
|
return expression;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Explanation toExplanation(StreamFactory factory) throws IOException {
|
||||||
|
return new Explanation(nodeId.toString())
|
||||||
|
.withExpressionType(ExpressionType.EVALUATOR)
|
||||||
|
.withFunctionName(factory.getFunctionName(getClass()))
|
||||||
|
.withImplementingClass(getClass().getName())
|
||||||
|
.withExpression(toExpression(factory).toString());
|
||||||
|
}
|
||||||
|
}
|
|
@ -5227,6 +5227,53 @@ public class StreamExpressionTest extends SolrCloudTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHist() throws Exception {
|
||||||
|
String expr = "hist(sequence(100, 0, 1), 10)";
|
||||||
|
ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
|
||||||
|
paramsLoc.set("expr", expr);
|
||||||
|
paramsLoc.set("qt", "/stream");
|
||||||
|
|
||||||
|
String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS;
|
||||||
|
TupleStream solrStream = new SolrStream(url, paramsLoc);
|
||||||
|
|
||||||
|
StreamContext context = new StreamContext();
|
||||||
|
solrStream.setStreamContext(context);
|
||||||
|
List<Tuple> tuples = getTuples(solrStream);
|
||||||
|
assertTrue(tuples.size() == 1);
|
||||||
|
List<Map> hist = (List<Map>)tuples.get(0).get("return-value");
|
||||||
|
assertTrue(hist.size() == 10);
|
||||||
|
for(int i=0; i<hist.size(); i++) {
|
||||||
|
Map stats = hist.get(i);
|
||||||
|
assertTrue(((Number)stats.get("N")).intValue() == 10);
|
||||||
|
assertTrue(((Number)stats.get("min")).intValue() == 10*i);
|
||||||
|
assertTrue(((Number)stats.get("var")).doubleValue() == 9.166666666666666);
|
||||||
|
assertTrue(((Number)stats.get("stdev")).doubleValue() == 3.0276503540974917);
|
||||||
|
}
|
||||||
|
|
||||||
|
expr = "hist(sequence(100, 0, 1), 5)";
|
||||||
|
paramsLoc = new ModifiableSolrParams();
|
||||||
|
paramsLoc.set("expr", expr);
|
||||||
|
paramsLoc.set("qt", "/stream");
|
||||||
|
|
||||||
|
solrStream = new SolrStream(url, paramsLoc);
|
||||||
|
solrStream.setStreamContext(context);
|
||||||
|
tuples = getTuples(solrStream);
|
||||||
|
assertTrue(tuples.size() == 1);
|
||||||
|
hist = (List<Map>)tuples.get(0).get("return-value");
|
||||||
|
assertTrue(hist.size() == 5);
|
||||||
|
for(int i=0; i<hist.size(); i++) {
|
||||||
|
Map stats = hist.get(i);
|
||||||
|
assertTrue(((Number)stats.get("N")).intValue() == 20);
|
||||||
|
assertTrue(((Number)stats.get("min")).intValue() == 20*i);
|
||||||
|
assertTrue(((Number)stats.get("var")).doubleValue() == 35);
|
||||||
|
assertTrue(((Number)stats.get("stdev")).doubleValue() == 5.916079783099616);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -5747,7 +5794,7 @@ public class StreamExpressionTest extends SolrCloudTestCase {
|
||||||
solrStream.setStreamContext(context);
|
solrStream.setStreamContext(context);
|
||||||
List<Tuple> tuples = getTuples(solrStream);
|
List<Tuple> tuples = getTuples(solrStream);
|
||||||
assertTrue(tuples.size() == 1);
|
assertTrue(tuples.size() == 1);
|
||||||
List<Number> out = (List<Number>)tuples.get(0).get("out");
|
List<Number> out = (List<Number>)tuples.get(0).get("return-value");
|
||||||
assertTrue(out.size() == 6);
|
assertTrue(out.size() == 6);
|
||||||
assertTrue(out.get(0).intValue() == 1);
|
assertTrue(out.get(0).intValue() == 1);
|
||||||
assertTrue(out.get(1).intValue() == 2);
|
assertTrue(out.get(1).intValue() == 2);
|
||||||
|
@ -5764,7 +5811,7 @@ public class StreamExpressionTest extends SolrCloudTestCase {
|
||||||
solrStream.setStreamContext(context);
|
solrStream.setStreamContext(context);
|
||||||
tuples = getTuples(solrStream);
|
tuples = getTuples(solrStream);
|
||||||
assertTrue(tuples.size() == 1);
|
assertTrue(tuples.size() == 1);
|
||||||
out = (List<Number>)tuples.get(0).get("out");
|
out = (List<Number>)tuples.get(0).get("return-value");
|
||||||
assertTrue(out.size() == 6);
|
assertTrue(out.size() == 6);
|
||||||
assertTrue(out.get(0).doubleValue() == 1.122D);
|
assertTrue(out.get(0).doubleValue() == 1.122D);
|
||||||
assertTrue(out.get(1).doubleValue() == 2.222D);
|
assertTrue(out.get(1).doubleValue() == 2.222D);
|
||||||
|
|
Loading…
Reference in New Issue