SOLR-11377: Add expMovingAverage (exponential moving average) Stream Evaluator

This commit is contained in:
Joel Bernstein 2017-10-08 20:55:29 -04:00
parent b9c8aa7059
commit aa6ec0ee54
4 changed files with 207 additions and 54 deletions

View File

@ -16,9 +16,6 @@
*/
package org.apache.solr.handler;
import static org.apache.solr.common.params.CommonParams.ID;
import static org.apache.solr.common.params.CommonParams.SORT;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
@ -40,56 +37,7 @@ import org.apache.solr.client.solrj.io.ops.ConcatOperation;
import org.apache.solr.client.solrj.io.ops.DistinctOperation;
import org.apache.solr.client.solrj.io.ops.GroupOperation;
import org.apache.solr.client.solrj.io.ops.ReplaceOperation;
import org.apache.solr.client.solrj.io.stream.CalculatorStream;
import org.apache.solr.client.solrj.io.stream.CartesianProductStream;
import org.apache.solr.client.solrj.io.stream.CellStream;
import org.apache.solr.client.solrj.io.stream.CloudSolrStream;
import org.apache.solr.client.solrj.io.stream.CommitStream;
import org.apache.solr.client.solrj.io.stream.ComplementStream;
import org.apache.solr.client.solrj.io.stream.DaemonStream;
import org.apache.solr.client.solrj.io.stream.EchoStream;
import org.apache.solr.client.solrj.io.stream.EvalStream;
import org.apache.solr.client.solrj.io.stream.ExceptionStream;
import org.apache.solr.client.solrj.io.stream.ExecutorStream;
import org.apache.solr.client.solrj.io.stream.FacetStream;
import org.apache.solr.client.solrj.io.stream.FeaturesSelectionStream;
import org.apache.solr.client.solrj.io.stream.FetchStream;
import org.apache.solr.client.solrj.io.stream.GetStream;
import org.apache.solr.client.solrj.io.stream.HashJoinStream;
import org.apache.solr.client.solrj.io.stream.HavingStream;
import org.apache.solr.client.solrj.io.stream.InnerJoinStream;
import org.apache.solr.client.solrj.io.stream.IntersectStream;
import org.apache.solr.client.solrj.io.stream.JDBCStream;
import org.apache.solr.client.solrj.io.stream.KnnStream;
import org.apache.solr.client.solrj.io.stream.LeftOuterJoinStream;
import org.apache.solr.client.solrj.io.stream.LetStream;
import org.apache.solr.client.solrj.io.stream.ListStream;
import org.apache.solr.client.solrj.io.stream.MergeStream;
import org.apache.solr.client.solrj.io.stream.ModelStream;
import org.apache.solr.client.solrj.io.stream.NullStream;
import org.apache.solr.client.solrj.io.stream.OuterHashJoinStream;
import org.apache.solr.client.solrj.io.stream.ParallelStream;
import org.apache.solr.client.solrj.io.stream.PlotStream;
import org.apache.solr.client.solrj.io.stream.PriorityStream;
import org.apache.solr.client.solrj.io.stream.RandomStream;
import org.apache.solr.client.solrj.io.stream.RankStream;
import org.apache.solr.client.solrj.io.stream.ReducerStream;
import org.apache.solr.client.solrj.io.stream.RollupStream;
import org.apache.solr.client.solrj.io.stream.ScoreNodesStream;
import org.apache.solr.client.solrj.io.stream.SelectStream;
import org.apache.solr.client.solrj.io.stream.ShuffleStream;
import org.apache.solr.client.solrj.io.stream.SignificantTermsStream;
import org.apache.solr.client.solrj.io.stream.SortStream;
import org.apache.solr.client.solrj.io.stream.SqlStream;
import org.apache.solr.client.solrj.io.stream.StatsStream;
import org.apache.solr.client.solrj.io.stream.StreamContext;
import org.apache.solr.client.solrj.io.stream.TextLogitStream;
import org.apache.solr.client.solrj.io.stream.TimeSeriesStream;
import org.apache.solr.client.solrj.io.stream.TopicStream;
import org.apache.solr.client.solrj.io.stream.TupStream;
import org.apache.solr.client.solrj.io.stream.TupleStream;
import org.apache.solr.client.solrj.io.stream.UniqueStream;
import org.apache.solr.client.solrj.io.stream.UpdateStream;
import org.apache.solr.client.solrj.io.stream.*;
import org.apache.solr.client.solrj.io.stream.expr.Explanation;
import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType;
import org.apache.solr.client.solrj.io.stream.expr.Expressible;
@ -119,6 +67,9 @@ import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.solr.common.params.CommonParams.ID;
import static org.apache.solr.common.params.CommonParams.SORT;
public class StreamHandler extends RequestHandlerBase implements SolrCoreAware, PermissionNameProvider {
static SolrClientCache clientCache = new SolrClientCache();
@ -299,6 +250,8 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
.withFunctionName("primes", PrimesEvaluator.class)
.withFunctionName("factorial", FactorialEvaluator.class)
.withFunctionName("movingMedian", MovingMedianEvaluator.class)
.withFunctionName("binomialCoefficient", BinomialCoefficientEvaluator.class)
.withFunctionName("expMovingAvg", ExponentialMovingAverageEvaluator.class)
.withFunctionName("monteCarlo", MonteCarloEvaluator.class)
.withFunctionName("constantDistribution", ConstantDistributionEvaluator.class)
.withFunctionName("weibullDistribution", WeibullDistributionEvaluator.class)

View File

@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.client.solrj.io.eval;
import java.io.IOException;
import java.util.Locale;
import org.apache.commons.math3.util.*;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
public class BinomialCoefficientEvaluator extends RecursiveNumericEvaluator implements ManyValueWorker {
protected static final long serialVersionUID = 1L;
public BinomialCoefficientEvaluator(StreamExpression expression, StreamFactory factory) throws IOException {
super(expression, factory);
if(2 != containedEvaluators.size()){
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting two values but found %d",expression, containedEvaluators.size()));
}
}
@Override
public Object doWork(Object... values) throws IOException {
if(2 != values.length){
throw new IOException(String.format(Locale.ROOT,"%s(...) only works with 2 values but %d were provided", constructingFactory.getFunctionName(getClass()), values.length));
}
int set = ((Number)values[0]).intValue();
int subset = ((Number)values[1]).intValue();
return CombinatoricsUtils.binomialCoefficient(set, subset);
}
}

View File

@ -0,0 +1,99 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.client.solrj.io.eval;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
public class ExponentialMovingAverageEvaluator extends RecursiveNumericEvaluator implements ManyValueWorker {
protected static final long serialVersionUID = 1L;
public ExponentialMovingAverageEvaluator(StreamExpression expression, StreamFactory factory) throws IOException {
super(expression, factory);
if (!(2 == containedEvaluators.size() || containedEvaluators.size() == 3)){
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting two or three values but found %d",expression, containedEvaluators.size()));
}
}
@Override
public Object doWork(Object... values) throws IOException {
if (!(2 == values.length || values.length == 3)){
throw new IOException(String.format(Locale.ROOT,"%s(...) only works with 2 or 3 values but %d were provided", constructingFactory.getFunctionName(getClass()), values.length));
}
List<?> observations = (List<?> )values[0];
Number window = (Number)values[1];
Number alpha;
if(2 == values.length){
if(!(observations instanceof List<?>)){
throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the first value, expecting a List", toExpression(constructingFactory), values[0].getClass().getSimpleName()));
}
if(!(observations.size() > 1)){
throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found list size of %s for the first value, expecting a List of size > 0.", toExpression(constructingFactory), observations.size()));
}
if(!(window instanceof Number)){
throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the second value, expecting a Number", toExpression(constructingFactory), values[1].getClass().getSimpleName()));
}
if (window.doubleValue() > observations.size()) {
throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found a window size of %s for the second value, the first value has a List size of %s, expecting a window value smaller or equal to the List size", toExpression(constructingFactory), window.intValue(), observations.size() ));
}
}
if(3 == values.length){
alpha = (Number) values[2];
if(!(alpha instanceof Number)){
throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the third value, expecting a Number", toExpression(constructingFactory), values[2].getClass().getSimpleName()));
}
if (!(alpha.doubleValue() >= 0 && alpha.doubleValue() <= 1.0)) {
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - out of range, found %s for the third value, expecting a range between 0 and 1.0",toExpression(constructingFactory), alpha.doubleValue()));
}
}else {
alpha = 2.0/(window.doubleValue() + 1.0);
}
List<Number> sequence = new ArrayList<>();
DescriptiveStatistics slider = new DescriptiveStatistics(window.intValue());
Number lastValue = 0;
for(Object value : observations) {
slider.addValue(((Number) value).doubleValue());
if (slider.getN() == window.intValue()) {
lastValue = slider.getMean();
break;
}
}
sequence.add(lastValue);
int i=0;
for(Object value : observations) {
if(i >= window.intValue()) {
Number val = (alpha.doubleValue() * (((Number) value).doubleValue() - lastValue.doubleValue())+lastValue.doubleValue());
sequence.add(val);
lastValue = val;
}
++i;
}
return sequence;
}
}

View File

@ -5896,6 +5896,23 @@ public class StreamExpressionTest extends SolrCloudTestCase {
assertEquals(asort.get(9).intValue(), 29);
}
@Test
public void testBinomialCoefficient() throws Exception {
String cexpr = "binomialCoefficient(8,3)";
ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
paramsLoc.set("expr", cexpr);
paramsLoc.set("qt", "/stream");
String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS;
TupleStream solrStream = new SolrStream(url, paramsLoc);
StreamContext context = new StreamContext();
solrStream.setStreamContext(context);
List<Tuple> tuples = getTuples(solrStream);
assertTrue(tuples.size() == 1);
Tuple tuple = tuples.get(0);
long binomialCoefficient = (long) tuple.get("return-value");
assertEquals(binomialCoefficient, 56);
}
@Test
public void testAscend() throws Exception {
String cexpr = "asc(array(11.5, 12.3, 4, 3, 1, 0))";
@ -6518,6 +6535,45 @@ public class StreamExpressionTest extends SolrCloudTestCase {
assertTrue(dotProduct.doubleValue() == 182);
}
@Test
public void testExponentialMovingAverage() throws Exception {
String cexpr = "expMovingAvg(array(22.27, 22.19, 22.08, 22.17, 22.18, 22.13, 22.23, 22.43, 22.24, 22.29, " +
"22.15, 22.39, 22.38, 22.61, 23.36, 24.05, 23.75, 23.83, 23.95, 23.63, 23.82, 23.87, 23.65, 23.19,"+
"23.10, 23.33, 22.68, 23.10, 22.40, 22.17), 10)";
ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
paramsLoc.set("expr", cexpr);
paramsLoc.set("qt", "/stream");
String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS;
TupleStream solrStream = new SolrStream(url, paramsLoc);
StreamContext context = new StreamContext();
solrStream.setStreamContext(context);
List<Tuple> tuples = getTuples(solrStream);
assertTrue(tuples.size() == 1);
List<Number> out = (List<Number>)tuples.get(0).get("return-value");
assertTrue(out.size() == 21);
assertEquals((double)out.get(0), 22.22, 0.009);
assertEquals((double)out.get(1), 22.21, 0.009);
assertEquals((double)out.get(2), 22.24, 0.009);
assertEquals((double)out.get(3), 22.27, 0.009);
assertEquals((double)out.get(4), 22.33, 0.009);
assertEquals((double)out.get(5), 22.52, 0.009);
assertEquals((double)out.get(6), 22.80, 0.009);
assertEquals((double)out.get(7), 22.97, 0.009);
assertEquals((double)out.get(8), 23.13, 0.009);
assertEquals((double)out.get(9), 23.28, 0.009);
assertEquals((double)out.get(10), 23.34, 0.009);
assertEquals((double)out.get(11), 23.43, 0.009);
assertEquals((double)out.get(12), 23.51, 0.009);
assertEquals((double)out.get(13), 23.54, 0.009);
assertEquals((double)out.get(14), 23.47, 0.009);
assertEquals((double)out.get(15), 23.40, 0.009);
assertEquals((double)out.get(16), 23.39, 0.009);
assertEquals((double)out.get(17), 23.26, 0.009);
assertEquals((double)out.get(18), 23.23, 0.009);
assertEquals((double)out.get(19), 23.08, 0.009);
assertEquals((double)out.get(20), 22.92, 0.009);
}
@Test
public void testResiduals() throws Exception {
@ -8537,5 +8593,4 @@ public class StreamExpressionTest extends SolrCloudTestCase {
stream.close();
}
}
}