SOLR-11907: Add convexHull and associated geometric Stream Evaluators

This commit is contained in:
Joel Bernstein 2018-09-30 20:56:03 -04:00
parent 964cc88cee
commit 3fc58f4a98
8 changed files with 327 additions and 1 deletions

View File

@ -256,6 +256,11 @@ public class Lang {
.withFunctionName("listCache", ListCacheEvaluator.class)
.withFunctionName("zscores", NormalizeEvaluator.class)
.withFunctionName("latlonVectors", LatLonVectorsEvaluator.class)
.withFunctionName("convexHull", ConvexHullEvaluator.class)
.withFunctionName("getVertices", GetVerticesEvaluator.class)
.withFunctionName("getBaryCenter", GetBaryCenterEvaluator.class)
.withFunctionName("getArea", GetAreaEvaluator.class)
.withFunctionName("getBoundarySize", GetBoundarySizeEvaluator.class)
// Boolean Stream Evaluators

View File

@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.client.solrj.io.eval;
import java.io.IOException;
import java.util.List;
import java.util.ArrayList;
import org.apache.commons.math3.geometry.euclidean.twod.Vector2D;
import org.apache.commons.math3.geometry.euclidean.twod.hull.ConvexHull2D;
import org.apache.commons.math3.geometry.euclidean.twod.hull.MonotoneChain;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
public class ConvexHullEvaluator extends RecursiveObjectEvaluator implements ManyValueWorker {
protected static final long serialVersionUID = 1L;
public ConvexHullEvaluator(StreamExpression expression, StreamFactory factory) throws IOException{
super(expression, factory);
}
@Override
public Object doWork(Object... objects) throws IOException{
if(objects[0] instanceof Matrix) {
return getConvexHull((Matrix)objects[0]);
} else {
throw new IOException("The convexHull function operates on a matrix of 2D vectors");
}
}
public static ConvexHull2D getConvexHull(Matrix matrix) throws IOException {
double[][] data = matrix.getData();
List<Vector2D> points = new ArrayList(data.length);
if(data[0].length == 2) {
for(double[] row : data) {
points.add(new Vector2D(row[0], row[1]));
}
MonotoneChain monotoneChain = new MonotoneChain();
ConvexHull2D convexHull2D = monotoneChain.generate(points);
return convexHull2D;
} else {
throw new IOException("The convexHull function operates on a matrix of 2D vectors");
}
}
}

View File

@ -0,0 +1,43 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.client.solrj.io.eval;
import java.io.IOException;
import java.util.Locale;
import org.apache.commons.math3.geometry.euclidean.twod.hull.ConvexHull2D;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
public class GetAreaEvaluator extends RecursiveObjectEvaluator implements OneValueWorker {
private static final long serialVersionUID = 1;
public GetAreaEvaluator(StreamExpression expression, StreamFactory factory) throws IOException {
super(expression, factory);
}
@Override
public Object doWork(Object value) throws IOException {
if(!(value instanceof ConvexHull2D)){
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for value, expecting a clustering result",toExpression(constructingFactory), value.getClass().getSimpleName()));
} else {
ConvexHull2D convexHull2D = (ConvexHull2D)value;
return convexHull2D.createRegion().getSize();
}
}
}

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.client.solrj.io.eval;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Locale;
import org.apache.commons.math3.geometry.euclidean.twod.hull.ConvexHull2D;
import org.apache.commons.math3.geometry.euclidean.twod.Vector2D;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
import java.util.List;
public class GetBaryCenterEvaluator extends RecursiveObjectEvaluator implements OneValueWorker {
private static final long serialVersionUID = 1;
public GetBaryCenterEvaluator(StreamExpression expression, StreamFactory factory) throws IOException {
super(expression, factory);
}
@Override
public Object doWork(Object value) throws IOException {
if(!(value instanceof ConvexHull2D)){
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for value, expecting a clustering result",toExpression(constructingFactory), value.getClass().getSimpleName()));
} else {
ConvexHull2D convexHull2D = (ConvexHull2D)value;
Vector2D vector2D = (Vector2D)convexHull2D.createRegion().getBarycenter();
List<Number> vec = new ArrayList();
vec.add(vector2D.getX());
vec.add(vector2D.getY());
return vec;
}
}
}

View File

@ -0,0 +1,43 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.client.solrj.io.eval;
import java.io.IOException;
import java.util.Locale;
import org.apache.commons.math3.geometry.euclidean.twod.hull.ConvexHull2D;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
public class GetBoundarySizeEvaluator extends RecursiveObjectEvaluator implements OneValueWorker {
private static final long serialVersionUID = 1;
public GetBoundarySizeEvaluator(StreamExpression expression, StreamFactory factory) throws IOException {
super(expression, factory);
}
@Override
public Object doWork(Object value) throws IOException {
if(!(value instanceof ConvexHull2D)){
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for value, expecting a clustering result",toExpression(constructingFactory), value.getClass().getSimpleName()));
} else {
ConvexHull2D convexHull2D = (ConvexHull2D)value;
return convexHull2D.createRegion().getBoundarySize();
}
}
}

View File

@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.client.solrj.io.eval;
import java.io.IOException;
import java.util.Locale;
import org.apache.commons.math3.geometry.euclidean.twod.hull.ConvexHull2D;
import org.apache.commons.math3.geometry.euclidean.twod.Vector2D;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
public class GetVerticesEvaluator extends RecursiveObjectEvaluator implements OneValueWorker {
private static final long serialVersionUID = 1;
public GetVerticesEvaluator(StreamExpression expression, StreamFactory factory) throws IOException {
super(expression, factory);
}
@Override
public Object doWork(Object value) throws IOException {
if(!(value instanceof ConvexHull2D)){
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for value, expecting a clustering result",toExpression(constructingFactory), value.getClass().getSimpleName()));
} else {
ConvexHull2D convexHull2D = (ConvexHull2D)value;
Vector2D[] vectors = convexHull2D.getVertices();
double[][] data = new double[vectors.length][2];
for(int i=0; i<vectors.length; i++) {
data[i][0] = vectors[i].getX();
data[i][1] = vectors[i].getY();
}
return new Matrix(data);
}
}
}

View File

@ -70,7 +70,8 @@ public class TestLang extends LuceneTestCase {
"mod", "ceil", "floor", "sin", "asin", "sinh", "cos", "acos", "cosh", "tan", "atan", "tanh", "round", "sqrt",
"cbrt", "coalesce", "uuid", "if", "convert", "valueAt", "memset", "fft", "ifft", "euclidean","manhattan",
"earthMovers", "canberra", "chebyshev", "ones", "zeros", "setValue", "getValue", "knnRegress", "gaussfit",
"outliers", "stream", "getCache", "putCache", "listCache", "removeCache", "zscores", "latlonVectors"};
"outliers", "stream", "getCache", "putCache", "listCache", "removeCache", "zscores", "latlonVectors",
"convexHull", "getVertices", "getBaryCenter", "getArea", "getBoundarySize"};
@Test
public void testLang() {

View File

@ -401,6 +401,77 @@ public class MathExpressionTest extends SolrCloudTestCase {
}
}
@Test
public void testConvexHull() throws Exception {
String expr = "let(echo=true," +
" x=array(96.42894739701268, 99.11076410926444, 95.71563821370013,101.4356840561301, 96.17912865782684, 113.430677406492, 109.5927785287056, 87.26561260238425, 103.3122002816537, 100.4959815617706, 92.78972440872515, 92.98815024042877, 89.1448359089767, 104.9410622701036, 106.5546761317927, 102.0132643274808, 119.6726096270366, 97.61388415294184, 106.7928221374049, 94.31369945729962, 87.37098859879977, 82.8015657665458, 88.84342877874248, 94.58797342988339, 92.38720473619748)," +
" y=array(97.43395922838836, 109.5441846957560, 78.82698890096127, 96.67181538737611,95.52423701473863, 85.3391529394878, 87.01956497912255, 111.5289690656729,86.41034184809114, 84.11696923489203, 109.3874354244069, 102.3391063812790,109.0604436531823,102.7957014900897,114.4376483055848,107.4387578165579,106.2490201384653,103.4490197583837,93.8201540211101,101.6060721649409, 115.3512636715722,119.1046170610335,99.74910277836263,104.2116724112481, 86.02222520549304)," +
" c=transpose(matrix(x, y))," +
" d=convexHull(c)," +
" e=getVertices(d)," +
" f=getArea(d)," +
" g=getBoundarySize(d)," +
" h=getBaryCenter(d))";
ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
paramsLoc.set("expr", expr);
paramsLoc.set("qt", "/stream");
String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS;
TupleStream solrStream = new SolrStream(url, paramsLoc);
StreamContext context = new StreamContext();
solrStream.setStreamContext(context);
List<Tuple> tuples = getTuples(solrStream);
assertTrue(tuples.size() == 1);
List<List<Number>> points = (List<List<Number>>)tuples.get(0).get("e");
assertTrue(points.size() == 6);
List<Number> point1 = points.get(0);
assertEquals(point1.size(), 2);
assertEquals(point1.get(0).doubleValue(), 82.8015657665458, 0.0);
assertEquals(point1.get(1).doubleValue(), 119.1046170610335, 0.0);
List<Number> point2 = points.get(1);
assertEquals(point2.size(), 2);
assertEquals(point2.get(0).doubleValue(), 92.38720473619748, 0.0);
assertEquals(point2.get(1).doubleValue(), 86.02222520549304, 0.0);
List<Number> point3 = points.get(2);
assertEquals(point3.size(), 2);
assertEquals(point3.get(0).doubleValue(), 95.71563821370013, 0.0);
assertEquals(point3.get(1).doubleValue(), 78.82698890096127, 0.0);
List<Number> point4 = points.get(3);
assertEquals(point4.size(), 2);
assertEquals(point4.get(0).doubleValue(), 113.430677406492, 0.0);
assertEquals(point4.get(1).doubleValue(), 85.3391529394878, 0.0);
List<Number> point5 = points.get(4);
assertEquals(point5.size(), 2);
assertEquals(point5.get(0).doubleValue(), 119.6726096270366, 0.0);
assertEquals(point5.get(1).doubleValue(), 106.2490201384653, 0.0);
List<Number> point6 = points.get(5);
assertEquals(point6.size(), 2);
assertEquals(point6.get(0).doubleValue(), 106.5546761317927, 0.0);
assertEquals(point6.get(1).doubleValue(), 114.4376483055848, 0.0);
double area = tuples.get(0).getDouble("f");
assertEquals(area, 911.6283603859929, 0.0);
double boundarySize = tuples.get(0).getDouble("g");
assertEquals(boundarySize, 122.73784789223708, 0.0);
List<Number> baryCenter = (List<Number>)tuples.get(0).get("h");
assertEquals(baryCenter.size(), 2);
assertEquals(baryCenter.get(0).doubleValue(), 101.3021125450865, 0.0);
assertEquals(baryCenter.get(1).doubleValue(), 100.07343616615786, 0.0);
}
@Test
public void testCumulativeProbability() throws Exception {
String expr = "cumulativeProbability(normalDistribution(500, 40), 500)";