MATH-1525: Make "EmptyClusterStrategy" and related logic in "KMeansPlusPlusClusterer" reusable.

This commit is contained in:
CT 2020-03-21 21:11:17 +08:00 committed by Gilles Sadowski
parent 955f56fbe4
commit baf8d0a404
1 changed files with 42 additions and 25 deletions

View File

@ -188,6 +188,14 @@ public class KMeansPlusPlusClusterer<T extends Clusterable> extends Clusterer<T>
return emptyStrategy; return emptyStrategy;
} }
/**
* Return the CentroidInitializer used by this instance.
* @return the CentroidInitializer
*/
CentroidInitializer getCentroidInitializer() {
return centroidInitializer;
}
/** /**
* Runs the K-means++ clustering algorithm. * Runs the K-means++ clustering algorithm.
* *
@ -219,7 +227,26 @@ public class KMeansPlusPlusClusterer<T extends Clusterable> extends Clusterer<T>
// iterate through updating the centers until we're done // iterate through updating the centers until we're done
final int max = (maxIterations < 0) ? Integer.MAX_VALUE : maxIterations; final int max = (maxIterations < 0) ? Integer.MAX_VALUE : maxIterations;
for (int count = 0; count < max; count++) { for (int count = 0; count < max; count++) {
boolean emptyCluster = false; boolean hasEmptyCluster = clusters.stream().anyMatch(cluster->cluster.getPoints().isEmpty());
List<CentroidCluster<T>> newClusters = adjustClustersCenters(clusters);
int changes = assignPointsToClusters(newClusters, points, assignments);
clusters = newClusters;
// if there were no more changes in the point-to-cluster assignment
// and there are no empty clusters left, return the current clusters
if (changes == 0 && !hasEmptyCluster) {
return clusters;
}
}
return clusters;
}
/**
* Adjust the clusters's centers with means of points
* @param clusters the origin clusters
* @return adjusted clusters with center points
*/
List<CentroidCluster<T>> adjustClustersCenters(List<CentroidCluster<T>> clusters) {
List<CentroidCluster<T>> newClusters = new ArrayList<>(); List<CentroidCluster<T>> newClusters = new ArrayList<>();
for (final CentroidCluster<T> cluster : clusters) { for (final CentroidCluster<T> cluster : clusters) {
final Clusterable newCenter; final Clusterable newCenter;
@ -237,22 +264,12 @@ public class KMeansPlusPlusClusterer<T extends Clusterable> extends Clusterer<T>
default : default :
throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS); throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
} }
emptyCluster = true;
} else { } else {
newCenter = cluster.centroid(); newCenter = cluster.centroid();
} }
newClusters.add(new CentroidCluster<T>(newCenter)); newClusters.add(new CentroidCluster<>(newCenter));
} }
int changes = assignPointsToClusters(newClusters, points, assignments); return newClusters;
clusters = newClusters;
// if there were no more changes in the point-to-cluster assignment
// and there are no empty clusters left, return the current clusters
if (changes == 0 && !emptyCluster) {
return clusters;
}
}
return clusters;
} }
/** /**