SOLR-11593: Add support for covariance matrices to the cov Stream Evaluator

This commit is contained in:
Joel Bernstein 2017-11-01 11:26:28 -04:00
parent 44b7bc0177
commit 6d5a7920ae
2 changed files with 54 additions and 21 deletions

View File

@ -19,13 +19,13 @@ package org.apache.solr.client.solrj.io.eval;
import java.io.IOException; import java.io.IOException;
import java.math.BigDecimal; import java.math.BigDecimal;
import java.util.List; import java.util.List;
import java.util.Locale;
import org.apache.commons.math3.linear.RealMatrix;
import org.apache.commons.math3.stat.correlation.Covariance; import org.apache.commons.math3.stat.correlation.Covariance;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
public class CovarianceEvaluator extends RecursiveNumericEvaluator implements TwoValueWorker { public class CovarianceEvaluator extends RecursiveObjectEvaluator implements ManyValueWorker {
protected static final long serialVersionUID = 1L; protected static final long serialVersionUID = 1L;
public CovarianceEvaluator(StreamExpression expression, StreamFactory factory) throws IOException{ public CovarianceEvaluator(StreamExpression expression, StreamFactory factory) throws IOException{
@ -33,25 +33,26 @@ public class CovarianceEvaluator extends RecursiveNumericEvaluator implements Tw
} }
@Override @Override
public Object doWork(Object first, Object second) throws IOException{ public Object doWork(Object ... values) throws IOException{
if(null == first){
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the first value",toExpression(constructingFactory)));
}
if(null == second){
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the second value",toExpression(constructingFactory)));
}
if(!(first instanceof List<?>)){
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for the first value, expecting a list of numbers",toExpression(constructingFactory), first.getClass().getSimpleName()));
}
if(!(second instanceof List<?>)){
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for the second value, expecting a list of numbers",toExpression(constructingFactory), first.getClass().getSimpleName()));
}
Covariance covariance = new Covariance(); if(values.length == 2) {
Object first = values[0];
return covariance.covariance( Object second = values[1];
((List)first).stream().mapToDouble(value -> ((BigDecimal)value).doubleValue()).toArray(), Covariance covariance = new Covariance();
((List)second).stream().mapToDouble(value -> ((BigDecimal)value).doubleValue()).toArray()
); return covariance.covariance(
((List) first).stream().mapToDouble(value -> ((BigDecimal) value).doubleValue()).toArray(),
((List) second).stream().mapToDouble(value -> ((BigDecimal) value).doubleValue()).toArray()
);
} else if(values.length == 1) {
Matrix matrix = (Matrix) values[0];
double[][] data = matrix.getData();
Covariance covariance = new Covariance(data, true);
RealMatrix coMatrix = covariance.getCovarianceMatrix();
double[][] coData = coMatrix.getData();
return new Matrix(coData);
} else {
throw new IOException("The cov function expects either two numeric arrays or a matrix as parameters.");
}
} }
} }

View File

@ -7056,6 +7056,38 @@ public class StreamExpressionTest extends SolrCloudTestCase {
assertEquals(maxa.doubleValue(), 30, .5); assertEquals(maxa.doubleValue(), 30, .5);
} }
@Test
public void testCovMatrix() throws Exception {
String cexpr = "let(a=array(1,2,3), b=array(2,4,6), c=array(4, 8, 12), d=transpose(matrix(a, b, c)), f=cov(d))";
ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
paramsLoc.set("expr", cexpr);
paramsLoc.set("qt", "/stream");
String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS;
TupleStream solrStream = new SolrStream(url, paramsLoc);
StreamContext context = new StreamContext();
solrStream.setStreamContext(context);
List<Tuple> tuples = getTuples(solrStream);
assertTrue(tuples.size() == 1);
List<List<Number>> cm = (List<List<Number>>)tuples.get(0).get("f");
assertEquals(cm.size(), 3);
List<Number> row1 = cm.get(0);
assertEquals(row1.size(), 3);
assertEquals(row1.get(0).longValue(), 1);
assertEquals(row1.get(1).longValue(), 2);
assertEquals(row1.get(2).longValue(), 4);
List<Number> row2 = cm.get(1);
assertEquals(row2.size(), 3);
assertEquals(row2.get(0).longValue(), 2);
assertEquals(row2.get(1).longValue(), 4);
assertEquals(row2.get(2).longValue(), 8);
List<Number> row3 = cm.get(2);
assertEquals(row3.size(), 3);
assertEquals(row3.get(0).longValue(), 4);
assertEquals(row3.get(1).longValue(), 8);
assertEquals(row3.get(2).longValue(), 16);
}