MATH-1525: Make "EmptyClusterStrategy" and related logic in "KMeansPlusPlusClusterer" reusable.
This commit is contained in:
parent
955f56fbe4
commit
baf8d0a404
|
@ -188,6 +188,14 @@ public class KMeansPlusPlusClusterer<T extends Clusterable> extends Clusterer<T>
|
||||||
return emptyStrategy;
|
return emptyStrategy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the CentroidInitializer used by this instance.
|
||||||
|
* @return the CentroidInitializer
|
||||||
|
*/
|
||||||
|
CentroidInitializer getCentroidInitializer() {
|
||||||
|
return centroidInitializer;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Runs the K-means++ clustering algorithm.
|
* Runs the K-means++ clustering algorithm.
|
||||||
*
|
*
|
||||||
|
@ -219,7 +227,26 @@ public class KMeansPlusPlusClusterer<T extends Clusterable> extends Clusterer<T>
|
||||||
// iterate through updating the centers until we're done
|
// iterate through updating the centers until we're done
|
||||||
final int max = (maxIterations < 0) ? Integer.MAX_VALUE : maxIterations;
|
final int max = (maxIterations < 0) ? Integer.MAX_VALUE : maxIterations;
|
||||||
for (int count = 0; count < max; count++) {
|
for (int count = 0; count < max; count++) {
|
||||||
boolean emptyCluster = false;
|
boolean hasEmptyCluster = clusters.stream().anyMatch(cluster->cluster.getPoints().isEmpty());
|
||||||
|
List<CentroidCluster<T>> newClusters = adjustClustersCenters(clusters);
|
||||||
|
int changes = assignPointsToClusters(newClusters, points, assignments);
|
||||||
|
clusters = newClusters;
|
||||||
|
|
||||||
|
// if there were no more changes in the point-to-cluster assignment
|
||||||
|
// and there are no empty clusters left, return the current clusters
|
||||||
|
if (changes == 0 && !hasEmptyCluster) {
|
||||||
|
return clusters;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return clusters;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adjust the clusters's centers with means of points
|
||||||
|
* @param clusters the origin clusters
|
||||||
|
* @return adjusted clusters with center points
|
||||||
|
*/
|
||||||
|
List<CentroidCluster<T>> adjustClustersCenters(List<CentroidCluster<T>> clusters) {
|
||||||
List<CentroidCluster<T>> newClusters = new ArrayList<>();
|
List<CentroidCluster<T>> newClusters = new ArrayList<>();
|
||||||
for (final CentroidCluster<T> cluster : clusters) {
|
for (final CentroidCluster<T> cluster : clusters) {
|
||||||
final Clusterable newCenter;
|
final Clusterable newCenter;
|
||||||
|
@ -237,22 +264,12 @@ public class KMeansPlusPlusClusterer<T extends Clusterable> extends Clusterer<T>
|
||||||
default :
|
default :
|
||||||
throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
|
throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
|
||||||
}
|
}
|
||||||
emptyCluster = true;
|
|
||||||
} else {
|
} else {
|
||||||
newCenter = cluster.centroid();
|
newCenter = cluster.centroid();
|
||||||
}
|
}
|
||||||
newClusters.add(new CentroidCluster<T>(newCenter));
|
newClusters.add(new CentroidCluster<>(newCenter));
|
||||||
}
|
}
|
||||||
int changes = assignPointsToClusters(newClusters, points, assignments);
|
return newClusters;
|
||||||
clusters = newClusters;
|
|
||||||
|
|
||||||
// if there were no more changes in the point-to-cluster assignment
|
|
||||||
// and there are no empty clusters left, return the current clusters
|
|
||||||
if (changes == 0 && !emptyCluster) {
|
|
||||||
return clusters;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return clusters;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue