MATH-1525: Make "EmptyClusterStrategy" and related logic in "KMeansPlusPlusClusterer" reusable.

This commit is contained in:
CT 2020-03-21 21:11:17 +08:00 committed by Gilles Sadowski
parent 955f56fbe4
commit baf8d0a404
1 changed files with 42 additions and 25 deletions

View File

@ -188,6 +188,14 @@ public class KMeansPlusPlusClusterer<T extends Clusterable> extends Clusterer<T>
return emptyStrategy;
}
/**
* Return the CentroidInitializer used by this instance.
* @return the CentroidInitializer
*/
CentroidInitializer getCentroidInitializer() {
return centroidInitializer;
}
/**
* Runs the K-means++ clustering algorithm.
*
@ -219,7 +227,26 @@ public class KMeansPlusPlusClusterer<T extends Clusterable> extends Clusterer<T>
// iterate through updating the centers until we're done
final int max = (maxIterations < 0) ? Integer.MAX_VALUE : maxIterations;
for (int count = 0; count < max; count++) {
boolean emptyCluster = false;
boolean hasEmptyCluster = clusters.stream().anyMatch(cluster->cluster.getPoints().isEmpty());
List<CentroidCluster<T>> newClusters = adjustClustersCenters(clusters);
int changes = assignPointsToClusters(newClusters, points, assignments);
clusters = newClusters;
// if there were no more changes in the point-to-cluster assignment
// and there are no empty clusters left, return the current clusters
if (changes == 0 && !hasEmptyCluster) {
return clusters;
}
}
return clusters;
}
/**
* Adjust the clusters's centers with means of points
* @param clusters the origin clusters
* @return adjusted clusters with center points
*/
List<CentroidCluster<T>> adjustClustersCenters(List<CentroidCluster<T>> clusters) {
List<CentroidCluster<T>> newClusters = new ArrayList<>();
for (final CentroidCluster<T> cluster : clusters) {
final Clusterable newCenter;
@ -237,22 +264,12 @@ public class KMeansPlusPlusClusterer<T extends Clusterable> extends Clusterer<T>
default :
throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
}
emptyCluster = true;
} else {
newCenter = cluster.centroid();
}
newClusters.add(new CentroidCluster<T>(newCenter));
newClusters.add(new CentroidCluster<>(newCenter));
}
int changes = assignPointsToClusters(newClusters, points, assignments);
clusters = newClusters;
// if there were no more changes in the point-to-cluster assignment
// and there are no empty clusters left, return the current clusters
if (changes == 0 && !emptyCluster) {
return clusters;
}
}
return clusters;
return newClusters;
}
/**