mirror of https://github.com/apache/lucene.git
SOLR-13589: Allow zplot to visualize clusters and convex hulls
This commit is contained in:
parent
fc15cd79f7
commit
c6cc2fd9fd
|
@ -290,6 +290,7 @@ public class Lang {
|
|||
.withFunctionName("notNull", NotNullEvaluator.class)
|
||||
.withFunctionName("isNull", IsNullEvaluator.class)
|
||||
.withFunctionName("matches", MatchesEvaluator.class)
|
||||
.withFunctionName("projectToBorder", ProjectToBorderEvaluator.class)
|
||||
|
||||
// Boolean Stream Evaluators
|
||||
|
||||
|
|
|
@ -78,7 +78,11 @@ public class KmeansEvaluator extends RecursiveObjectEvaluator implements TwoValu
|
|||
|
||||
for(int i=0; i<data.length; i++) {
|
||||
double[] vec = data[i];
|
||||
points.add(new ClusterPoint(ids.get(i), vec));
|
||||
if(ids != null) {
|
||||
points.add(new ClusterPoint(ids.get(i), vec));
|
||||
} else {
|
||||
points.add(new ClusterPoint(Integer.toString(i), vec));
|
||||
}
|
||||
}
|
||||
|
||||
Map fields = new HashMap();
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.client.solrj.io.eval;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.commons.math3.geometry.euclidean.twod.Euclidean2D;
|
||||
import org.apache.commons.math3.geometry.euclidean.twod.hull.ConvexHull2D;
|
||||
import org.apache.commons.math3.geometry.euclidean.twod.Vector2D;
|
||||
import org.apache.commons.math3.geometry.partitioning.BoundaryProjection;
|
||||
import org.apache.commons.math3.geometry.partitioning.Region;
|
||||
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
|
||||
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
|
||||
|
||||
public class ProjectToBorderEvaluator extends RecursiveObjectEvaluator implements TwoValueWorker {
|
||||
private static final long serialVersionUID = 1;
|
||||
|
||||
public ProjectToBorderEvaluator(StreamExpression expression, StreamFactory factory) throws IOException {
|
||||
super(expression, factory);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object doWork(Object value1, Object value2) throws IOException {
|
||||
if(!(value1 instanceof ConvexHull2D)){
|
||||
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for value, expecting a ConvexHull2D",toExpression(constructingFactory), value1.getClass().getSimpleName()));
|
||||
}
|
||||
|
||||
if(!(value2 instanceof Matrix)){
|
||||
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for value, expecting a Matrix",toExpression(constructingFactory), value2.getClass().getSimpleName()));
|
||||
}
|
||||
|
||||
ConvexHull2D convexHull2D = (ConvexHull2D)value1;
|
||||
Matrix matrix = (Matrix)value2;
|
||||
double[][] data = matrix.getData();
|
||||
Region<Euclidean2D> region = convexHull2D.createRegion();
|
||||
double[][] borderPoints = new double[data.length][2];
|
||||
int i = 0;
|
||||
for(double[] row : data) {
|
||||
BoundaryProjection<Euclidean2D> boundaryProjection = region.projectToBoundary(new Vector2D(row));
|
||||
Vector2D point = (Vector2D)boundaryProjection.getProjected();
|
||||
borderPoints[i][0] = point.getX();
|
||||
borderPoints[i][1] = point.getY();
|
||||
i++;
|
||||
}
|
||||
|
||||
return new Matrix(borderPoints);
|
||||
|
||||
}
|
||||
}
|
|
@ -27,12 +27,15 @@ import java.util.Set;
|
|||
|
||||
import org.apache.commons.math3.distribution.IntegerDistribution;
|
||||
import org.apache.commons.math3.distribution.RealDistribution;
|
||||
import org.apache.commons.math3.geometry.Point;
|
||||
import org.apache.commons.math3.ml.clustering.CentroidCluster;
|
||||
import org.apache.commons.math3.random.EmpiricalDistribution;
|
||||
import org.apache.commons.math3.stat.Frequency;
|
||||
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
|
||||
import org.apache.commons.math3.util.Precision;
|
||||
import org.apache.solr.client.solrj.io.Tuple;
|
||||
import org.apache.solr.client.solrj.io.comp.StreamComparator;
|
||||
import org.apache.solr.client.solrj.io.eval.KmeansEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.StreamEvaluator;
|
||||
import org.apache.solr.client.solrj.io.stream.expr.Explanation;
|
||||
import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType;
|
||||
|
@ -126,6 +129,7 @@ public class ZplotStream extends TupleStream implements Expressible {
|
|||
int columns = 0;
|
||||
boolean table = false;
|
||||
boolean distribution = false;
|
||||
boolean clusters = false;
|
||||
for(Map.Entry<String, Object> entry : entries) {
|
||||
++columns;
|
||||
|
||||
|
@ -134,6 +138,8 @@ public class ZplotStream extends TupleStream implements Expressible {
|
|||
table = true;
|
||||
} else if(name.equals("dist")) {
|
||||
distribution = true;
|
||||
} else if(name.equals("clusters")) {
|
||||
clusters = true;
|
||||
}
|
||||
|
||||
Object o = entry.getValue();
|
||||
|
@ -181,7 +187,7 @@ public class ZplotStream extends TupleStream implements Expressible {
|
|||
//Load the values into tuples
|
||||
|
||||
List<Tuple> outTuples = new ArrayList();
|
||||
if(!table && !distribution) {
|
||||
if(!table && !distribution && !clusters) {
|
||||
//Handle the vectors
|
||||
for (int i = 0; i < numTuples; i++) {
|
||||
Tuple tuple = new Tuple(new HashMap());
|
||||
|
@ -194,13 +200,28 @@ public class ZplotStream extends TupleStream implements Expressible {
|
|||
}
|
||||
|
||||
//Generate the x axis if the tuples contain y and not x
|
||||
if(outTuples.get(0).fields.containsKey("y") && !outTuples.get(0).fields.containsKey("x")) {
|
||||
if (outTuples.get(0).fields.containsKey("y") && !outTuples.get(0).fields.containsKey("x")) {
|
||||
int x = 0;
|
||||
for(Tuple tuple : outTuples) {
|
||||
for (Tuple tuple : outTuples) {
|
||||
tuple.put("x", x++);
|
||||
}
|
||||
}
|
||||
|
||||
} else if(clusters) {
|
||||
Object o = evaluated.get("clusters");
|
||||
KmeansEvaluator.ClusterTuple ct = (KmeansEvaluator.ClusterTuple)o;
|
||||
List<CentroidCluster<KmeansEvaluator.ClusterPoint>> cs = ct.getClusters();
|
||||
int clusterNum = 0;
|
||||
for(CentroidCluster<KmeansEvaluator.ClusterPoint> c : cs) {
|
||||
clusterNum++;
|
||||
List<KmeansEvaluator.ClusterPoint> points = c.getPoints();
|
||||
for(KmeansEvaluator.ClusterPoint p : points) {
|
||||
Tuple tuple = new Tuple(new HashMap());
|
||||
tuple.put("x", p.getPoint()[0]);
|
||||
tuple.put("y", p.getPoint()[1]);
|
||||
tuple.put("cluster", "cluster"+clusterNum);
|
||||
outTuples.add(tuple);
|
||||
}
|
||||
}
|
||||
} else if(distribution) {
|
||||
Object o = evaluated.get("dist");
|
||||
if(o instanceof RealDistribution) {
|
||||
|
|
|
@ -76,7 +76,7 @@ public class TestLang extends SolrTestCase {
|
|||
"getAmplitude", "getPhase", "getAngularFrequency", "enclosingDisk", "getCenter", "getRadius",
|
||||
"getSupportPoints", "pairSort", "log10", "plist", "recip", "pivot", "ltrim", "rtrim", "export",
|
||||
"zplot", "natural", "repeat", "movingMAD", "hashRollup", "noop", "var", "stddev", "recNum", "isNull",
|
||||
"notNull", "matches"};
|
||||
"notNull", "matches", "projectToBorder"};
|
||||
|
||||
@Test
|
||||
public void testLang() {
|
||||
|
|
|
@ -407,7 +407,8 @@ public class MathExpressionTest extends SolrCloudTestCase {
|
|||
" e=getVertices(d)," +
|
||||
" f=getArea(d)," +
|
||||
" g=getBoundarySize(d)," +
|
||||
" h=getBaryCenter(d))";
|
||||
" h=getBaryCenter(d)," +
|
||||
" i=projectToBorder(d, matrix(array(99.11076410926444, 109.5441846957560))))";
|
||||
ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
|
||||
paramsLoc.set("expr", expr);
|
||||
paramsLoc.set("qt", "/stream");
|
||||
|
@ -466,6 +467,11 @@ public class MathExpressionTest extends SolrCloudTestCase {
|
|||
assertEquals(baryCenter.get(0).doubleValue(), 101.3021125450865, 0.0);
|
||||
assertEquals(baryCenter.get(1).doubleValue(), 100.07343616615786, 0.0);
|
||||
|
||||
List<List<Number>> borderPoints = (List<List<Number>>)tuples.get(0).get("i");
|
||||
assertEquals(borderPoints.get(0).get(0).doubleValue(), 100.31316833934775, 0);
|
||||
assertEquals(borderPoints.get(0).get(1).doubleValue(), 115.6639686234851, 0);
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue