MATH-1516: Require that evaluators implement "ClusterEvaluator".

This commit is contained in:
Gilles 2020-03-10 03:31:51 +01:00
parent 4eb5f9f8a6
commit 68f7eae071
4 changed files with 22 additions and 10 deletions

View File

@ -54,6 +54,9 @@ If the output is not quite correct, check for invisible trailing spaces!
</release>
<release version="4.0" date="XXXX-XX-XX" description="">
<action dev="erans" type="add" issue="MATH-1519" due-to="Chentao">
Add "Calinski-Harabsz" clustering evaluator.
</action>
<action dev="erans" type="update" issue="MATH-1523">
Abstract class "ClusterEvaluator" replaced by an interface.
</action>

View File

@ -19,7 +19,7 @@ package org.apache.commons.math4.ml.clustering.evaluation;
import org.apache.commons.math4.exception.InsufficientDataException;
import org.apache.commons.math4.ml.clustering.Cluster;
import org.apache.commons.math4.ml.clustering.ClusterRanking;
import org.apache.commons.math4.ml.clustering.ClusterEvaluator;
import org.apache.commons.math4.ml.clustering.Clusterable;
import org.apache.commons.math4.util.MathArrays;
@ -38,12 +38,10 @@ import java.util.List;
* @see <a href="https://www.tandfonline.com/doi/abs/10.1080/03610927408827101">A dendrite method for cluster
* analysis</a>
*/
public class CalinskiHarabasz<T extends Clusterable> implements ClusterRanking<T> {
/**
* {@inheritDoc}
*/
public class CalinskiHarabasz<T extends Clusterable> implements ClusterEvaluator<T> {
/** {@inheritDoc} */
@Override
public double compute(List<? extends Cluster<T>> clusters) {
public double score(List<? extends Cluster<T>> clusters) {
final int dimension = dimensionOfClusters(clusters);
final double[] centroid = meanOfClusters(clusters, dimension);
@ -68,6 +66,13 @@ public class CalinskiHarabasz<T extends Clusterable> implements ClusterRanking<T
(intraDistanceProduct * (clusterCount - 1)));
}
/** {@inheritDoc} */
@Override
public boolean isBetterScore(double a,
double b) {
return a > b;
}
/**
* Calculate covariance of two double array.
* <pre>

View File

@ -16,5 +16,9 @@
*/
/**
* Cluster evaluation methods.
*
* All evaluators should implement the
* {@link org.apache.commons.math4.ml.clustering.ClusterEvaluator}
* interface.
*/
package org.apache.commons.math4.ml.clustering.evaluation;

View File

@ -18,7 +18,7 @@
package org.apache.commons.math4.ml.clustering.evaluation;
import org.apache.commons.math4.ml.clustering.CentroidCluster;
import org.apache.commons.math4.ml.clustering.ClusterRanking;
import org.apache.commons.math4.ml.clustering.ClusterEvaluator;
import org.apache.commons.math4.ml.clustering.DoublePoint;
import org.apache.commons.math4.ml.clustering.KMeansPlusPlusClusterer;
import org.apache.commons.math4.ml.distance.DistanceMeasure;
@ -33,7 +33,7 @@ import java.util.ArrayList;
import java.util.List;
public class CalinskiHarabaszTest {
private ClusterRanking<DoublePoint> evaluator;
private ClusterEvaluator<DoublePoint> evaluator;
private DistanceMeasure distanceMeasure;
@Before
@ -65,7 +65,7 @@ public class CalinskiHarabaszTest {
final int k = i + 2;
KMeansPlusPlusClusterer<DoublePoint> kMeans = new KMeansPlusPlusClusterer<>(k, -1, distanceMeasure, rnd);
List<CentroidCluster<DoublePoint>> clusters = kMeans.cluster(points);
double score = evaluator.compute(clusters);
double score = evaluator.score(clusters);
if (score > expectBestScore) {
expectBestScore = score;
}
@ -91,7 +91,7 @@ public class CalinskiHarabaszTest {
final int k = i + 2;
KMeansPlusPlusClusterer<DoublePoint> kMeans = new KMeansPlusPlusClusterer<>(k, -1, distanceMeasure, rnd);
List<CentroidCluster<DoublePoint>> clusters = kMeans.cluster(points);
double score = evaluator.compute(clusters);
double score = evaluator.score(clusters);
if (score > expectBestScore) {
expectBestScore = score;
}