mirror of https://github.com/apache/lucene.git
SOLR-10754: Add hist Stream Evaluator
This commit is contained in:
parent
3e70745c79
commit
d1436c4823
|
@ -191,6 +191,7 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
|
|||
.withFunctionName("finddelay", FindDelayEvaluator.class)
|
||||
.withFunctionName("sequence", SequenceEvaluator.class)
|
||||
.withFunctionName("array", ArrayEvaluator.class)
|
||||
.withFunctionName("hist", HistogramEvaluator.class)
|
||||
|
||||
// metrics
|
||||
.withFunctionName("min", MinMetric.class)
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.client.solrj.io.eval;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.math3.random.EmpiricalDistribution;
|
||||
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
|
||||
import org.apache.solr.client.solrj.io.Tuple;
|
||||
import org.apache.solr.client.solrj.io.stream.expr.Explanation;
|
||||
import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType;
|
||||
import org.apache.solr.client.solrj.io.stream.expr.Expressible;
|
||||
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
|
||||
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParameter;
|
||||
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
|
||||
|
||||
public class HistogramEvaluator extends ComplexEvaluator implements Expressible {
|
||||
|
||||
private static final long serialVersionUID = 1;
|
||||
|
||||
public HistogramEvaluator(StreamExpression expression, StreamFactory factory) throws IOException {
|
||||
super(expression, factory);
|
||||
}
|
||||
|
||||
public List<Map> evaluate(Tuple tuple) throws IOException {
|
||||
|
||||
StreamEvaluator colEval1 = subEvaluators.get(0);
|
||||
|
||||
List<Number> numbers1 = (List<Number>)colEval1.evaluate(tuple);
|
||||
double[] column1 = new double[numbers1.size()];
|
||||
|
||||
for(int i=0; i<numbers1.size(); i++) {
|
||||
column1[i] = numbers1.get(i).doubleValue();
|
||||
}
|
||||
|
||||
int bins = 10;
|
||||
if(subEvaluators.size() == 2) {
|
||||
StreamEvaluator binsEval = subEvaluators.get(1);
|
||||
Number binsNum = (Number) binsEval.evaluate(tuple);
|
||||
bins = binsNum.intValue();
|
||||
}
|
||||
|
||||
EmpiricalDistribution empiricalDistribution = new EmpiricalDistribution(bins);
|
||||
empiricalDistribution.load(column1);
|
||||
|
||||
List<Map> binList = new ArrayList();
|
||||
|
||||
List<SummaryStatistics> summaries = empiricalDistribution.getBinStats();
|
||||
for(SummaryStatistics statisticalSummary : summaries) {
|
||||
Map map = new HashMap();
|
||||
map.put("max", statisticalSummary.getMax());
|
||||
map.put("mean", statisticalSummary.getMean());
|
||||
map.put("min", statisticalSummary.getMin());
|
||||
map.put("stdev", statisticalSummary.getStandardDeviation());
|
||||
map.put("sum", statisticalSummary.getSum());
|
||||
map.put("N", statisticalSummary.getN());
|
||||
map.put("var", statisticalSummary.getVariance());
|
||||
binList.add(map);
|
||||
}
|
||||
|
||||
return binList;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public StreamExpressionParameter toExpression(StreamFactory factory) throws IOException {
|
||||
StreamExpression expression = new StreamExpression(factory.getFunctionName(getClass()));
|
||||
return expression;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation toExplanation(StreamFactory factory) throws IOException {
|
||||
return new Explanation(nodeId.toString())
|
||||
.withExpressionType(ExpressionType.EVALUATOR)
|
||||
.withFunctionName(factory.getFunctionName(getClass()))
|
||||
.withImplementingClass(getClass().getName())
|
||||
.withExpression(toExpression(factory).toString());
|
||||
}
|
||||
}
|
|
@ -5227,6 +5227,53 @@ public class StreamExpressionTest extends SolrCloudTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHist() throws Exception {
|
||||
String expr = "hist(sequence(100, 0, 1), 10)";
|
||||
ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
|
||||
paramsLoc.set("expr", expr);
|
||||
paramsLoc.set("qt", "/stream");
|
||||
|
||||
String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS;
|
||||
TupleStream solrStream = new SolrStream(url, paramsLoc);
|
||||
|
||||
StreamContext context = new StreamContext();
|
||||
solrStream.setStreamContext(context);
|
||||
List<Tuple> tuples = getTuples(solrStream);
|
||||
assertTrue(tuples.size() == 1);
|
||||
List<Map> hist = (List<Map>)tuples.get(0).get("return-value");
|
||||
assertTrue(hist.size() == 10);
|
||||
for(int i=0; i<hist.size(); i++) {
|
||||
Map stats = hist.get(i);
|
||||
assertTrue(((Number)stats.get("N")).intValue() == 10);
|
||||
assertTrue(((Number)stats.get("min")).intValue() == 10*i);
|
||||
assertTrue(((Number)stats.get("var")).doubleValue() == 9.166666666666666);
|
||||
assertTrue(((Number)stats.get("stdev")).doubleValue() == 3.0276503540974917);
|
||||
}
|
||||
|
||||
expr = "hist(sequence(100, 0, 1), 5)";
|
||||
paramsLoc = new ModifiableSolrParams();
|
||||
paramsLoc.set("expr", expr);
|
||||
paramsLoc.set("qt", "/stream");
|
||||
|
||||
solrStream = new SolrStream(url, paramsLoc);
|
||||
solrStream.setStreamContext(context);
|
||||
tuples = getTuples(solrStream);
|
||||
assertTrue(tuples.size() == 1);
|
||||
hist = (List<Map>)tuples.get(0).get("return-value");
|
||||
assertTrue(hist.size() == 5);
|
||||
for(int i=0; i<hist.size(); i++) {
|
||||
Map stats = hist.get(i);
|
||||
assertTrue(((Number)stats.get("N")).intValue() == 20);
|
||||
assertTrue(((Number)stats.get("min")).intValue() == 20*i);
|
||||
assertTrue(((Number)stats.get("var")).doubleValue() == 35);
|
||||
assertTrue(((Number)stats.get("stdev")).doubleValue() == 5.916079783099616);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
|
@ -5747,7 +5794,7 @@ public class StreamExpressionTest extends SolrCloudTestCase {
|
|||
solrStream.setStreamContext(context);
|
||||
List<Tuple> tuples = getTuples(solrStream);
|
||||
assertTrue(tuples.size() == 1);
|
||||
List<Number> out = (List<Number>)tuples.get(0).get("out");
|
||||
List<Number> out = (List<Number>)tuples.get(0).get("return-value");
|
||||
assertTrue(out.size() == 6);
|
||||
assertTrue(out.get(0).intValue() == 1);
|
||||
assertTrue(out.get(1).intValue() == 2);
|
||||
|
@ -5764,7 +5811,7 @@ public class StreamExpressionTest extends SolrCloudTestCase {
|
|||
solrStream.setStreamContext(context);
|
||||
tuples = getTuples(solrStream);
|
||||
assertTrue(tuples.size() == 1);
|
||||
out = (List<Number>)tuples.get(0).get("out");
|
||||
out = (List<Number>)tuples.get(0).get("return-value");
|
||||
assertTrue(out.size() == 6);
|
||||
assertTrue(out.get(0).doubleValue() == 1.122D);
|
||||
assertTrue(out.get(1).doubleValue() == 2.222D);
|
||||
|
|
Loading…
Reference in New Issue