Merge pull request #7503 from alimate/BAEL-3070
BAEL-3070: K-Means Clustering Code Samples
This commit is contained in:
commit
181943a6f8
|
@ -1,5 +1,5 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<artifactId>algorithms-miscellaneous-3</artifactId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
|
@ -18,17 +18,28 @@
|
|||
<version>${org.assertj.core.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-collections4</artifactId>
|
||||
<version>${commons-collections4.version}</version>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-collections4</artifactId>
|
||||
<version>${commons-collections4.version}</version>
|
||||
</dependency>
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
<version>${guava.version}</version>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
<version>${guava.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.squareup.retrofit2</groupId>
|
||||
<artifactId>retrofit</artifactId>
|
||||
<version>${retrofit.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.squareup.retrofit2</groupId>
|
||||
<artifactId>converter-jackson</artifactId>
|
||||
<version>${retrofit.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
|
@ -61,5 +72,6 @@
|
|||
<org.assertj.core.version>3.9.0</org.assertj.core.version>
|
||||
<commons-collections4.version>4.3</commons-collections4.version>
|
||||
<guava.version>28.0-jre</guava.version>
|
||||
<retrofit.version>2.6.0</retrofit.version>
|
||||
</properties>
|
||||
</project>
|
|
@ -0,0 +1,45 @@
|
|||
package com.baeldung.algorithms.kmeans;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Encapsulates all coordinates for a particular cluster centroid.
|
||||
*/
|
||||
public class Centroid {
|
||||
|
||||
/**
|
||||
* The centroid coordinates.
|
||||
*/
|
||||
private final Map<String, Double> coordinates;
|
||||
|
||||
public Centroid(Map<String, Double> coordinates) {
|
||||
this.coordinates = coordinates;
|
||||
}
|
||||
|
||||
public Map<String, Double> getCoordinates() {
|
||||
return coordinates;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
Centroid centroid = (Centroid) o;
|
||||
return Objects.equals(getCoordinates(), centroid.getCoordinates());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(getCoordinates());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Centroid " + coordinates;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
package com.baeldung.algorithms.kmeans;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Defines a contract to calculate distance between two feature vectors. The less the
|
||||
* calculated distance, the more two items are similar to each other.
|
||||
*/
|
||||
public interface Distance {
|
||||
|
||||
/**
|
||||
* Calculates the distance between two feature vectors.
|
||||
*
|
||||
* @param f1 The first set of features.
|
||||
* @param f2 The second set of features.
|
||||
* @return Calculated distance.
|
||||
* @throws IllegalArgumentException If the given feature vectors are invalid.
|
||||
*/
|
||||
double calculate(Map<String, Double> f1, Map<String, Double> f2);
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
package com.baeldung.algorithms.kmeans;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Encapsulates methods to calculates errors between centroid and the cluster members.
|
||||
*/
|
||||
public class Errors {
|
||||
|
||||
public static double sse(Map<Centroid, List<Record>> clustered, Distance distance) {
|
||||
double sum = 0;
|
||||
for (Map.Entry<Centroid, List<Record>> entry : clustered.entrySet()) {
|
||||
Centroid centroid = entry.getKey();
|
||||
for (Record record : entry.getValue()) {
|
||||
double d = distance.calculate(centroid.getCoordinates(), record.getFeatures());
|
||||
sum += Math.pow(d, 2);
|
||||
}
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
package com.baeldung.algorithms.kmeans;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Calculates the distance between two items using the Euclidean formula.
|
||||
*/
|
||||
public class EuclideanDistance implements Distance {
|
||||
|
||||
@Override
|
||||
public double calculate(Map<String, Double> f1, Map<String, Double> f2) {
|
||||
if (f1 == null || f2 == null) {
|
||||
throw new IllegalArgumentException("Feature vectors can't be null");
|
||||
}
|
||||
|
||||
double sum = 0;
|
||||
for (String key : f1.keySet()) {
|
||||
Double v1 = f1.get(key);
|
||||
Double v2 = f2.get(key);
|
||||
|
||||
if (v1 != null && v2 != null) sum += Math.pow(v1 - v2, 2);
|
||||
}
|
||||
|
||||
return Math.sqrt(sum);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,236 @@
|
|||
package com.baeldung.algorithms.kmeans;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
import static java.util.stream.Collectors.toList;
|
||||
import static java.util.stream.Collectors.toSet;
|
||||
|
||||
/**
|
||||
* Encapsulates an implementation of KMeans clustering algorithm.
|
||||
*
|
||||
* @author Ali Dehghani
|
||||
*/
|
||||
public class KMeans {
|
||||
|
||||
private KMeans() {
|
||||
throw new IllegalAccessError("You shouldn't call this constructor");
|
||||
}
|
||||
|
||||
/**
|
||||
* Will be used to generate random numbers.
|
||||
*/
|
||||
private static final Random random = new Random();
|
||||
|
||||
/**
|
||||
* Performs the K-Means clustering algorithm on the given dataset.
|
||||
*
|
||||
* @param records The dataset.
|
||||
* @param k Number of Clusters.
|
||||
* @param distance To calculate the distance between two items.
|
||||
* @param maxIterations Upper bound for the number of iterations.
|
||||
* @return K clusters along with their features.
|
||||
*/
|
||||
public static Map<Centroid, List<Record>> fit(List<Record> records, int k, Distance distance, int maxIterations) {
|
||||
applyPreconditions(records, k, distance, maxIterations);
|
||||
|
||||
List<Centroid> centroids = randomCentroids(records, k);
|
||||
Map<Centroid, List<Record>> clusters = new HashMap<>();
|
||||
Map<Centroid, List<Record>> lastState = new HashMap<>();
|
||||
|
||||
// iterate for a pre-defined number of times
|
||||
for (int i = 0; i < maxIterations; i++) {
|
||||
boolean isLastIteration = i == maxIterations - 1;
|
||||
|
||||
// in each iteration we should find the nearest centroid for each record
|
||||
for (Record record : records) {
|
||||
Centroid centroid = nearestCentroid(record, centroids, distance);
|
||||
assignToCluster(clusters, record, centroid);
|
||||
}
|
||||
|
||||
// if the assignment does not change, then the algorithm terminates
|
||||
boolean shouldTerminate = isLastIteration || clusters.equals(lastState);
|
||||
lastState = clusters;
|
||||
if (shouldTerminate) {
|
||||
break;
|
||||
}
|
||||
|
||||
// at the end of each iteration we should relocate the centroids
|
||||
centroids = relocateCentroids(clusters);
|
||||
clusters = new HashMap<>();
|
||||
}
|
||||
|
||||
return lastState;
|
||||
}
|
||||
|
||||
/**
|
||||
* Move all cluster centroids to the average of all assigned features.
|
||||
*
|
||||
* @param clusters The current cluster configuration.
|
||||
* @return Collection of new and relocated centroids.
|
||||
*/
|
||||
private static List<Centroid> relocateCentroids(Map<Centroid, List<Record>> clusters) {
|
||||
return clusters
|
||||
.entrySet()
|
||||
.stream()
|
||||
.map(e -> average(e.getKey(), e.getValue()))
|
||||
.collect(toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* Moves the given centroid to the average position of all assigned features. If
|
||||
* the centroid has no feature in its cluster, then there would be no need for a
|
||||
* relocation. Otherwise, for each entry we calculate the average of all records
|
||||
* first by summing all the entries and then dividing the final summation value by
|
||||
* the number of records.
|
||||
*
|
||||
* @param centroid The centroid to move.
|
||||
* @param records The assigned features.
|
||||
* @return The moved centroid.
|
||||
*/
|
||||
private static Centroid average(Centroid centroid, List<Record> records) {
|
||||
// if this cluster is empty, then we shouldn't move the centroid
|
||||
if (records == null || records.isEmpty()) {
|
||||
return centroid;
|
||||
}
|
||||
|
||||
// Since some records don't have all possible attributes, we initialize
|
||||
// average coordinates equal to current centroid coordinates
|
||||
Map<String, Double> average = centroid.getCoordinates();
|
||||
|
||||
// The average function works correctly if we clear all coordinates corresponding
|
||||
// to present record attributes
|
||||
records
|
||||
.stream()
|
||||
.flatMap(e -> e
|
||||
.getFeatures()
|
||||
.keySet()
|
||||
.stream())
|
||||
.forEach(k -> average.put(k, 0.0));
|
||||
|
||||
for (Record record : records) {
|
||||
record
|
||||
.getFeatures()
|
||||
.forEach((k, v) -> average.compute(k, (k1, currentValue) -> v + currentValue));
|
||||
}
|
||||
|
||||
average.forEach((k, v) -> average.put(k, v / records.size()));
|
||||
|
||||
return new Centroid(average);
|
||||
}
|
||||
|
||||
/**
|
||||
* Assigns a feature vector to the given centroid. If this is the first assignment for this centroid,
|
||||
* first we should create the list.
|
||||
*
|
||||
* @param clusters The current cluster configuration.
|
||||
* @param record The feature vector.
|
||||
* @param centroid The centroid.
|
||||
*/
|
||||
private static void assignToCluster(Map<Centroid, List<Record>> clusters, Record record, Centroid centroid) {
|
||||
clusters.compute(centroid, (key, list) -> {
|
||||
if (list == null) {
|
||||
list = new ArrayList<>();
|
||||
}
|
||||
|
||||
list.add(record);
|
||||
return list;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* With the help of the given distance calculator, iterates through centroids and finds the
|
||||
* nearest one to the given record.
|
||||
*
|
||||
* @param record The feature vector to find a centroid for.
|
||||
* @param centroids Collection of all centroids.
|
||||
* @param distance To calculate the distance between two items.
|
||||
* @return The nearest centroid to the given feature vector.
|
||||
*/
|
||||
private static Centroid nearestCentroid(Record record, List<Centroid> centroids, Distance distance) {
|
||||
double minimumDistance = Double.MAX_VALUE;
|
||||
Centroid nearest = null;
|
||||
|
||||
for (Centroid centroid : centroids) {
|
||||
double currentDistance = distance.calculate(record.getFeatures(), centroid.getCoordinates());
|
||||
|
||||
if (currentDistance < minimumDistance) {
|
||||
minimumDistance = currentDistance;
|
||||
nearest = centroid;
|
||||
}
|
||||
}
|
||||
|
||||
return nearest;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates k random centroids. Before kicking-off the centroid generation process,
|
||||
* first we calculate the possible value range for each attribute. Then when
|
||||
* we're going to generate the centroids, we generate random coordinates in
|
||||
* the [min, max] range for each attribute.
|
||||
*
|
||||
* @param records The dataset which helps to calculate the [min, max] range for
|
||||
* each attribute.
|
||||
* @param k Number of clusters.
|
||||
* @return Collections of randomly generated centroids.
|
||||
*/
|
||||
private static List<Centroid> randomCentroids(List<Record> records, int k) {
|
||||
List<Centroid> centroids = new ArrayList<>();
|
||||
Map<String, Double> maxs = new HashMap<>();
|
||||
Map<String, Double> mins = new HashMap<>();
|
||||
|
||||
for (Record record : records) {
|
||||
record
|
||||
.getFeatures()
|
||||
.forEach((key, value) -> {
|
||||
// compares the value with the current max and choose the bigger value between them
|
||||
maxs.compute(key, (k1, max) -> max == null || value > max ? value : max);
|
||||
|
||||
// compare the value with the current min and choose the smaller value between them
|
||||
mins.compute(key, (k1, min) -> min == null || value < min ? value : min);
|
||||
});
|
||||
}
|
||||
|
||||
Set<String> attributes = records
|
||||
.stream()
|
||||
.flatMap(e -> e
|
||||
.getFeatures()
|
||||
.keySet()
|
||||
.stream())
|
||||
.collect(toSet());
|
||||
for (int i = 0; i < k; i++) {
|
||||
Map<String, Double> coordinates = new HashMap<>();
|
||||
for (String attribute : attributes) {
|
||||
double max = maxs.get(attribute);
|
||||
double min = mins.get(attribute);
|
||||
coordinates.put(attribute, random.nextDouble() * (max - min) + min);
|
||||
}
|
||||
|
||||
centroids.add(new Centroid(coordinates));
|
||||
}
|
||||
|
||||
return centroids;
|
||||
}
|
||||
|
||||
private static void applyPreconditions(List<Record> records, int k, Distance distance, int maxIterations) {
|
||||
if (records == null || records.isEmpty()) {
|
||||
throw new IllegalArgumentException("The dataset can't be empty");
|
||||
}
|
||||
|
||||
if (k <= 1) {
|
||||
throw new IllegalArgumentException("It doesn't make sense to have less than or equal to 1 cluster");
|
||||
}
|
||||
|
||||
if (distance == null) {
|
||||
throw new IllegalArgumentException("The distance calculator is required");
|
||||
}
|
||||
|
||||
if (maxIterations <= 0) {
|
||||
throw new IllegalArgumentException("Max iterations should be a positive number");
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,144 @@
|
|||
package com.baeldung.algorithms.kmeans;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import okhttp3.OkHttpClient;
|
||||
import retrofit2.Retrofit;
|
||||
import retrofit2.converter.jackson.JacksonConverterFactory;
|
||||
|
||||
import static java.util.stream.Collectors.toSet;
|
||||
|
||||
public class LastFm {
|
||||
|
||||
private static OkHttpClient okHttp = new OkHttpClient.Builder()
|
||||
.addInterceptor(new LastFmService.Authenticator("put your API key here"))
|
||||
.build();
|
||||
|
||||
private static Retrofit retrofit = new Retrofit.Builder()
|
||||
.client(okHttp)
|
||||
.addConverterFactory(JacksonConverterFactory.create())
|
||||
.baseUrl("http://ws.audioscrobbler.com/")
|
||||
.build();
|
||||
|
||||
private static LastFmService lastFm = retrofit.create(LastFmService.class);
|
||||
|
||||
private static ObjectMapper mapper = new ObjectMapper();
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
List<String> artists = getTop100Artists();
|
||||
Set<String> tags = getTop100Tags();
|
||||
List<Record> records = datasetWithTaggedArtists(artists, tags);
|
||||
|
||||
Map<Centroid, List<Record>> clusters = KMeans.fit(records, 7, new EuclideanDistance(), 1000);
|
||||
// Print the cluster configuration
|
||||
clusters.forEach((key, value) -> {
|
||||
System.out.println("------------------------------ CLUSTER -----------------------------------");
|
||||
|
||||
System.out.println(sortedCentroid(key));
|
||||
String members = String.join(", ", value
|
||||
.stream()
|
||||
.map(Record::getDescription)
|
||||
.collect(toSet()));
|
||||
System.out.print(members);
|
||||
|
||||
System.out.println();
|
||||
System.out.println();
|
||||
});
|
||||
|
||||
Map<String, Object> json = convertToD3CompatibleMap(clusters);
|
||||
System.out.println(mapper.writeValueAsString(json));
|
||||
}
|
||||
|
||||
private static Map<String, Object> convertToD3CompatibleMap(Map<Centroid, List<Record>> clusters) {
|
||||
Map<String, Object> json = new HashMap<>();
|
||||
json.put("name", "Musicians");
|
||||
List<Map<String, Object>> children = new ArrayList<>();
|
||||
clusters.forEach((key, value) -> {
|
||||
Map<String, Object> child = new HashMap<>();
|
||||
child.put("name", dominantGenre(sortedCentroid(key)));
|
||||
List<Map<String, String>> nested = new ArrayList<>();
|
||||
for (Record record : value) {
|
||||
nested.add(Collections.singletonMap("name", record.getDescription()));
|
||||
}
|
||||
child.put("children", nested);
|
||||
|
||||
children.add(child);
|
||||
});
|
||||
json.put("children", children);
|
||||
return json;
|
||||
}
|
||||
|
||||
private static String dominantGenre(Centroid centroid) {
|
||||
return centroid
|
||||
.getCoordinates()
|
||||
.keySet()
|
||||
.stream()
|
||||
.limit(2)
|
||||
.collect(Collectors.joining(", "));
|
||||
}
|
||||
|
||||
private static Centroid sortedCentroid(Centroid key) {
|
||||
List<Map.Entry<String, Double>> entries = new ArrayList<>(key
|
||||
.getCoordinates()
|
||||
.entrySet());
|
||||
entries.sort((e1, e2) -> e2
|
||||
.getValue()
|
||||
.compareTo(e1.getValue()));
|
||||
|
||||
Map<String, Double> sorted = new LinkedHashMap<>();
|
||||
for (Map.Entry<String, Double> entry : entries) {
|
||||
sorted.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
|
||||
return new Centroid(sorted);
|
||||
}
|
||||
|
||||
private static List<Record> datasetWithTaggedArtists(List<String> artists, Set<String> topTags) throws IOException {
|
||||
List<Record> records = new ArrayList<>();
|
||||
for (String artist : artists) {
|
||||
Map<String, Double> tags = lastFm
|
||||
.topTagsFor(artist)
|
||||
.execute()
|
||||
.body()
|
||||
.all();
|
||||
|
||||
// Only keep popular tags.
|
||||
tags
|
||||
.entrySet()
|
||||
.removeIf(e -> !topTags.contains(e.getKey()));
|
||||
|
||||
records.add(new Record(artist, tags));
|
||||
}
|
||||
return records;
|
||||
}
|
||||
|
||||
private static Set<String> getTop100Tags() throws IOException {
|
||||
return lastFm
|
||||
.topTags()
|
||||
.execute()
|
||||
.body()
|
||||
.all();
|
||||
}
|
||||
|
||||
private static List<String> getTop100Artists() throws IOException {
|
||||
List<String> artists = new ArrayList<>();
|
||||
for (int i = 1; i <= 2; i++) {
|
||||
artists.addAll(lastFm
|
||||
.topArtists(i)
|
||||
.execute()
|
||||
.body()
|
||||
.all());
|
||||
}
|
||||
|
||||
return artists;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,118 @@
|
|||
package com.baeldung.algorithms.kmeans;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonAutoDetect;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import okhttp3.HttpUrl;
|
||||
import okhttp3.Interceptor;
|
||||
import okhttp3.Request;
|
||||
import okhttp3.Response;
|
||||
import retrofit2.Call;
|
||||
import retrofit2.http.GET;
|
||||
import retrofit2.http.Query;
|
||||
|
||||
import static com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility.ANY;
|
||||
import static java.util.stream.Collectors.toList;
|
||||
|
||||
public interface LastFmService {
|
||||
|
||||
@GET("/2.0/?method=chart.gettopartists&format=json&limit=50")
|
||||
Call<Artists> topArtists(@Query("page") int page);
|
||||
|
||||
@GET("/2.0/?method=artist.gettoptags&format=json&limit=20&autocorrect=1")
|
||||
Call<Tags> topTagsFor(@Query("artist") String artist);
|
||||
|
||||
@GET("/2.0/?method=chart.gettoptags&format=json&limit=100")
|
||||
Call<TopTags> topTags();
|
||||
|
||||
/**
|
||||
* HTTP interceptor to intercept all HTTP requests and add the API key to them.
|
||||
*/
|
||||
class Authenticator implements Interceptor {
|
||||
|
||||
private final String apiKey;
|
||||
|
||||
Authenticator(String apiKey) {
|
||||
this.apiKey = apiKey;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Response intercept(Chain chain) throws IOException {
|
||||
HttpUrl url = chain
|
||||
.request()
|
||||
.url()
|
||||
.newBuilder()
|
||||
.addQueryParameter("api_key", apiKey)
|
||||
.build();
|
||||
Request request = chain
|
||||
.request()
|
||||
.newBuilder()
|
||||
.url(url)
|
||||
.build();
|
||||
|
||||
return chain.proceed(request);
|
||||
}
|
||||
}
|
||||
|
||||
@JsonAutoDetect(fieldVisibility = ANY)
|
||||
class TopTags {
|
||||
|
||||
private Map<String, Object> tags;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public Set<String> all() {
|
||||
List<Map<String, Object>> topTags = (List<Map<String, Object>>) tags.get("tag");
|
||||
return topTags
|
||||
.stream()
|
||||
.map(e -> ((String) e.get("name")))
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
}
|
||||
|
||||
@JsonAutoDetect(fieldVisibility = ANY)
|
||||
class Tags {
|
||||
|
||||
@JsonProperty("toptags") private Map<String, Object> topTags;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public Map<String, Double> all() {
|
||||
try {
|
||||
Map<String, Double> all = new HashMap<>();
|
||||
List<Map<String, Object>> tags = (List<Map<String, Object>>) topTags.get("tag");
|
||||
for (Map<String, Object> tag : tags) {
|
||||
all.put(((String) tag.get("name")), ((Integer) tag.get("count")).doubleValue());
|
||||
}
|
||||
|
||||
return all;
|
||||
} catch (Exception e) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@JsonAutoDetect(fieldVisibility = ANY)
|
||||
class Artists {
|
||||
|
||||
private Map<String, Object> artists;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public List<String> all() {
|
||||
try {
|
||||
List<Map<String, Object>> artists = (List<Map<String, Object>>) this.artists.get("artist");
|
||||
return artists
|
||||
.stream()
|
||||
.map(e -> ((String) e.get("name")))
|
||||
.collect(toList());
|
||||
} catch (Exception e) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
package com.baeldung.algorithms.kmeans;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Encapsulates all feature values for a few attributes. Optionally each record
|
||||
* can be described with the {@link #description} field.
|
||||
*/
|
||||
public class Record {
|
||||
|
||||
/**
|
||||
* The record description. For example, this can be the artist name for the famous musician
|
||||
* example.
|
||||
*/
|
||||
private final String description;
|
||||
|
||||
/**
|
||||
* Encapsulates all attributes and their corresponding values, i.e. features.
|
||||
*/
|
||||
private final Map<String, Double> features;
|
||||
|
||||
public Record(String description, Map<String, Double> features) {
|
||||
this.description = description;
|
||||
this.features = features;
|
||||
}
|
||||
|
||||
public Record(Map<String, Double> features) {
|
||||
this("", features);
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
|
||||
public Map<String, Double> getFeatures() {
|
||||
return features;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
String prefix = description == null || description
|
||||
.trim()
|
||||
.isEmpty() ? "Record" : description;
|
||||
|
||||
return prefix + ": " + features;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
Record record = (Record) o;
|
||||
return Objects.equals(getDescription(), record.getDescription()) && Objects.equals(getFeatures(), record.getFeatures());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(getDescription(), getFeatures());
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,490 @@
|
|||
{
|
||||
"children": [
|
||||
{
|
||||
"children": [
|
||||
{
|
||||
"name": "Radiohead"
|
||||
},
|
||||
{
|
||||
"name": "Red Hot Chili Peppers"
|
||||
},
|
||||
{
|
||||
"name": "Coldplay"
|
||||
},
|
||||
{
|
||||
"name": "Nirvana"
|
||||
},
|
||||
{
|
||||
"name": "Panic! at the Disco"
|
||||
},
|
||||
{
|
||||
"name": "The Cure"
|
||||
},
|
||||
{
|
||||
"name": "Linkin Park"
|
||||
},
|
||||
{
|
||||
"name": "Radiohead"
|
||||
},
|
||||
{
|
||||
"name": "Red Hot Chili Peppers"
|
||||
},
|
||||
{
|
||||
"name": "Coldplay"
|
||||
},
|
||||
{
|
||||
"name": "Nirvana"
|
||||
},
|
||||
{
|
||||
"name": "Panic! at the Disco"
|
||||
},
|
||||
{
|
||||
"name": "The Cure"
|
||||
},
|
||||
{
|
||||
"name": "Linkin Park"
|
||||
},
|
||||
{
|
||||
"name": "Muse"
|
||||
},
|
||||
{
|
||||
"name": "Maroon 5"
|
||||
},
|
||||
{
|
||||
"name": "Foo Fighters"
|
||||
},
|
||||
{
|
||||
"name": "Paramore"
|
||||
},
|
||||
{
|
||||
"name": "Oasis"
|
||||
},
|
||||
{
|
||||
"name": "Fall Out Boy"
|
||||
},
|
||||
{
|
||||
"name": "OneRepublic"
|
||||
},
|
||||
{
|
||||
"name": "Weezer"
|
||||
},
|
||||
{
|
||||
"name": "System of a Down"
|
||||
},
|
||||
{
|
||||
"name": "The White Stripes"
|
||||
}
|
||||
],
|
||||
"name": "rock, alternative"
|
||||
},
|
||||
{
|
||||
"children": [
|
||||
{
|
||||
"name": "Lil Nas X"
|
||||
},
|
||||
{
|
||||
"name": "Post Malone"
|
||||
},
|
||||
{
|
||||
"name": "Drake"
|
||||
},
|
||||
{
|
||||
"name": "Kanye West"
|
||||
},
|
||||
{
|
||||
"name": "Kendrick Lamar"
|
||||
},
|
||||
{
|
||||
"name": "Tyler, the Creator"
|
||||
},
|
||||
{
|
||||
"name": "Eminem"
|
||||
},
|
||||
{
|
||||
"name": "Childish Gambino"
|
||||
},
|
||||
{
|
||||
"name": "Frank Ocean"
|
||||
},
|
||||
{
|
||||
"name": "Lil Nas X"
|
||||
},
|
||||
{
|
||||
"name": "Post Malone"
|
||||
},
|
||||
{
|
||||
"name": "Drake"
|
||||
},
|
||||
{
|
||||
"name": "Kanye West"
|
||||
},
|
||||
{
|
||||
"name": "Kendrick Lamar"
|
||||
},
|
||||
{
|
||||
"name": "Tyler, the Creator"
|
||||
},
|
||||
{
|
||||
"name": "Eminem"
|
||||
},
|
||||
{
|
||||
"name": "Childish Gambino"
|
||||
},
|
||||
{
|
||||
"name": "Frank Ocean"
|
||||
},
|
||||
{
|
||||
"name": "Lizzo"
|
||||
},
|
||||
{
|
||||
"name": "Travi$ Scott"
|
||||
},
|
||||
{
|
||||
"name": "A$AP Rocky"
|
||||
},
|
||||
{
|
||||
"name": "Nicki Minaj"
|
||||
},
|
||||
{
|
||||
"name": "xxxtentacion"
|
||||
}
|
||||
],
|
||||
"name": "Hip-Hop, rap"
|
||||
},
|
||||
{
|
||||
"children": [
|
||||
{
|
||||
"name": "Arctic Monkeys"
|
||||
},
|
||||
{
|
||||
"name": "Imagine Dragons"
|
||||
},
|
||||
{
|
||||
"name": "The Killers"
|
||||
},
|
||||
{
|
||||
"name": "Gorillaz"
|
||||
},
|
||||
{
|
||||
"name": "The Black Keys"
|
||||
},
|
||||
{
|
||||
"name": "Arctic Monkeys"
|
||||
},
|
||||
{
|
||||
"name": "Imagine Dragons"
|
||||
},
|
||||
{
|
||||
"name": "The Killers"
|
||||
},
|
||||
{
|
||||
"name": "Gorillaz"
|
||||
},
|
||||
{
|
||||
"name": "The Black Keys"
|
||||
},
|
||||
{
|
||||
"name": "Twenty One Pilots"
|
||||
},
|
||||
{
|
||||
"name": "Ellie Goulding"
|
||||
},
|
||||
{
|
||||
"name": "Florence + the Machine"
|
||||
},
|
||||
{
|
||||
"name": "Vampire Weekend"
|
||||
},
|
||||
{
|
||||
"name": "The Smiths"
|
||||
},
|
||||
{
|
||||
"name": "The Strokes"
|
||||
},
|
||||
{
|
||||
"name": "MGMT"
|
||||
},
|
||||
{
|
||||
"name": "Foster the People"
|
||||
},
|
||||
{
|
||||
"name": "Two Door Cinema Club"
|
||||
},
|
||||
{
|
||||
"name": "Cage the Elephant"
|
||||
},
|
||||
{
|
||||
"name": "Arcade Fire"
|
||||
},
|
||||
{
|
||||
"name": "The 1975"
|
||||
}
|
||||
],
|
||||
"name": "indie, alternative"
|
||||
},
|
||||
{
|
||||
"children": [
|
||||
{
|
||||
"name": "Ed Sheeran"
|
||||
},
|
||||
{
|
||||
"name": "Tame Impala"
|
||||
},
|
||||
{
|
||||
"name": "Ed Sheeran"
|
||||
},
|
||||
{
|
||||
"name": "Tame Impala"
|
||||
},
|
||||
{
|
||||
"name": "Green Day"
|
||||
},
|
||||
{
|
||||
"name": "Metallica"
|
||||
},
|
||||
{
|
||||
"name": "blink-182"
|
||||
},
|
||||
{
|
||||
"name": "Bon Iver"
|
||||
},
|
||||
{
|
||||
"name": "The Clash"
|
||||
}
|
||||
],
|
||||
"name": "rock, punk rock"
|
||||
},
|
||||
{
|
||||
"children": [
|
||||
{
|
||||
"name": "Calvin Harris"
|
||||
},
|
||||
{
|
||||
"name": "The Weeknd"
|
||||
},
|
||||
{
|
||||
"name": "The Chainsmokers"
|
||||
},
|
||||
{
|
||||
"name": "Daft Punk"
|
||||
},
|
||||
{
|
||||
"name": "Marshmello"
|
||||
},
|
||||
{
|
||||
"name": "David Guetta"
|
||||
},
|
||||
{
|
||||
"name": "Calvin Harris"
|
||||
},
|
||||
{
|
||||
"name": "The Weeknd"
|
||||
},
|
||||
{
|
||||
"name": "The Chainsmokers"
|
||||
},
|
||||
{
|
||||
"name": "Daft Punk"
|
||||
},
|
||||
{
|
||||
"name": "Marshmello"
|
||||
},
|
||||
{
|
||||
"name": "David Guetta"
|
||||
},
|
||||
{
|
||||
"name": "Avicii"
|
||||
},
|
||||
{
|
||||
"name": "Kygo"
|
||||
},
|
||||
{
|
||||
"name": "Martin Garrix"
|
||||
},
|
||||
{
|
||||
"name": "Major Lazer"
|
||||
},
|
||||
{
|
||||
"name": "Depeche Mode"
|
||||
}
|
||||
],
|
||||
"name": "electronic, dance"
|
||||
},
|
||||
{
|
||||
"children": [
|
||||
{
|
||||
"name": "Queen"
|
||||
},
|
||||
{
|
||||
"name": "The Beatles"
|
||||
},
|
||||
{
|
||||
"name": "David Bowie"
|
||||
},
|
||||
{
|
||||
"name": "Fleetwood Mac"
|
||||
},
|
||||
{
|
||||
"name": "Pink Floyd"
|
||||
},
|
||||
{
|
||||
"name": "The Rolling Stones"
|
||||
},
|
||||
{
|
||||
"name": "Led Zeppelin"
|
||||
},
|
||||
{
|
||||
"name": "Queen"
|
||||
},
|
||||
{
|
||||
"name": "The Beatles"
|
||||
},
|
||||
{
|
||||
"name": "David Bowie"
|
||||
},
|
||||
{
|
||||
"name": "Fleetwood Mac"
|
||||
},
|
||||
{
|
||||
"name": "Pink Floyd"
|
||||
},
|
||||
{
|
||||
"name": "The Rolling Stones"
|
||||
},
|
||||
{
|
||||
"name": "Led Zeppelin"
|
||||
},
|
||||
{
|
||||
"name": "Elton John"
|
||||
}
|
||||
],
|
||||
"name": "classic rock, rock"
|
||||
},
|
||||
{
|
||||
"children": [
|
||||
{
|
||||
"name": "Billie Eilish"
|
||||
},
|
||||
{
|
||||
"name": "Ariana Grande"
|
||||
},
|
||||
{
|
||||
"name": "Taylor Swift"
|
||||
},
|
||||
{
|
||||
"name": "Beyoncé"
|
||||
},
|
||||
{
|
||||
"name": "Shawn Mendes"
|
||||
},
|
||||
{
|
||||
"name": "Rihanna"
|
||||
},
|
||||
{
|
||||
"name": "Lana Del Rey"
|
||||
},
|
||||
{
|
||||
"name": "Katy Perry"
|
||||
},
|
||||
{
|
||||
"name": "Lady Gaga"
|
||||
},
|
||||
{
|
||||
"name": "Miley Cyrus"
|
||||
},
|
||||
{
|
||||
"name": "Mark Ronson"
|
||||
},
|
||||
{
|
||||
"name": "Madonna"
|
||||
},
|
||||
{
|
||||
"name": "Lorde"
|
||||
},
|
||||
{
|
||||
"name": "Khalid"
|
||||
},
|
||||
{
|
||||
"name": "Billie Eilish"
|
||||
},
|
||||
{
|
||||
"name": "Ariana Grande"
|
||||
},
|
||||
{
|
||||
"name": "Taylor Swift"
|
||||
},
|
||||
{
|
||||
"name": "Beyoncé"
|
||||
},
|
||||
{
|
||||
"name": "Shawn Mendes"
|
||||
},
|
||||
{
|
||||
"name": "Rihanna"
|
||||
},
|
||||
{
|
||||
"name": "Lana Del Rey"
|
||||
},
|
||||
{
|
||||
"name": "Katy Perry"
|
||||
},
|
||||
{
|
||||
"name": "Lady Gaga"
|
||||
},
|
||||
{
|
||||
"name": "Miley Cyrus"
|
||||
},
|
||||
{
|
||||
"name": "Mark Ronson"
|
||||
},
|
||||
{
|
||||
"name": "Madonna"
|
||||
},
|
||||
{
|
||||
"name": "Lorde"
|
||||
},
|
||||
{
|
||||
"name": "Khalid"
|
||||
},
|
||||
{
|
||||
"name": "Sia"
|
||||
},
|
||||
{
|
||||
"name": "Sam Smith"
|
||||
},
|
||||
{
|
||||
"name": "Halsey"
|
||||
},
|
||||
{
|
||||
"name": "Michael Jackson"
|
||||
},
|
||||
{
|
||||
"name": "Charli XCX"
|
||||
},
|
||||
{
|
||||
"name": "Britney Spears"
|
||||
},
|
||||
{
|
||||
"name": "Dua Lipa"
|
||||
},
|
||||
{
|
||||
"name": "Jonas Brothers"
|
||||
},
|
||||
{
|
||||
"name": "Bruno Mars"
|
||||
},
|
||||
{
|
||||
"name": "Carly Rae Jepsen"
|
||||
},
|
||||
{
|
||||
"name": "P!nk"
|
||||
},
|
||||
{
|
||||
"name": "Adele"
|
||||
}
|
||||
],
|
||||
"name": "pop, female vocalists"
|
||||
}
|
||||
],
|
||||
"name": "Musicians"
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
<!DOCTYPE html>
|
||||
<meta charset="utf-8">
|
||||
<style>
|
||||
.node circle {
|
||||
fill: #fff;
|
||||
stroke: steelblue;
|
||||
stroke-width: 1.5px;
|
||||
}
|
||||
|
||||
.node {
|
||||
font: 10px sans-serif;
|
||||
}
|
||||
|
||||
.link {
|
||||
fill: none;
|
||||
stroke: #ccc;
|
||||
stroke-width: 1.5px;
|
||||
}
|
||||
</style>
|
||||
<body>
|
||||
<script src="http://d3js.org/d3.v3.min.js"></script>
|
||||
<script>
|
||||
var diameter = 1100;
|
||||
var tree = d3.layout.tree()
|
||||
.size([360, diameter / 2 - 300])
|
||||
.separation(function (a, b) {
|
||||
return (a.parent == b.parent ? 1 : 2) / a.depth;
|
||||
});
|
||||
var diagonal = d3.svg.diagonal.radial()
|
||||
.projection(function (d) {
|
||||
return [d.y, d.x / 180 * Math.PI];
|
||||
});
|
||||
var svg = d3.select("body").append("svg")
|
||||
.attr("width", diameter)
|
||||
.attr("height", diameter - 150)
|
||||
.append("g")
|
||||
.attr("transform", "translate(" + diameter / 2 + "," + diameter / 2 + ")");
|
||||
d3.json("lastfm.json", function (error, root) {
|
||||
var nodes = tree.nodes(root),
|
||||
links = tree.links(nodes);
|
||||
var link = svg.selectAll(".link")
|
||||
.data(links)
|
||||
.enter().append("path")
|
||||
.attr("class", "link")
|
||||
.attr("d", diagonal);
|
||||
var node = svg.selectAll(".node")
|
||||
.data(nodes)
|
||||
.enter().append("g")
|
||||
.attr("class", "node")
|
||||
.attr("transform", function (d) {
|
||||
return "rotate(" + (d.x - 90) + ")translate(" + d.y + ")";
|
||||
})
|
||||
node.append("circle")
|
||||
.attr("r", 4.5);
|
||||
node.append("text")
|
||||
.attr("dy", ".31em")
|
||||
.attr("text-anchor", function (d) {
|
||||
return d.x < 180 ? "start" : "end";
|
||||
})
|
||||
.attr("transform", function (d) {
|
||||
return d.x < 180 ? "translate(8)" : "rotate(180)translate(-8)";
|
||||
})
|
||||
.text(function (d) {
|
||||
return d.name;
|
||||
});
|
||||
});
|
||||
d3.select(self.frameElement).style("height", diameter - 150 + "px");
|
||||
</script>
|
Loading…
Reference in New Issue