From 7fb72bfe10d84d3419b07a8782418f86ab075a56 Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Mon, 7 Nov 2016 14:34:17 +0100 Subject: [PATCH] SOLR-9293: Solrj client support for hierarchical clusters and other topics marker. --- solr/CHANGES.txt | 3 + .../solr/client/solrj/response/Cluster.java | 48 +++++++++----- .../solrj/response/ClusteringResponse.java | 48 +++++++++++--- .../solrj/sampleClusteringResponse.xml | 19 ++++++ .../response/TestClusteringResponse.java | 62 +++++-------------- 5 files changed, 107 insertions(+), 73 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 2e6487dc77c..ad022fbf35b 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -81,6 +81,9 @@ Detailed Change List New Features ---------------------- +* SOLR-9293: Solrj client support for hierarchical clusters and other topics + marker. (Dawid Weiss) + * SOLR-9681: FacetModule / JSON Facet API added the ability to add filters directly to any facet command. The filters are applied after any domain change operations. Example: { type:terms, field:category, filter:"user:yonik" } diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/Cluster.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/Cluster.java index ae3e529dab4..378e1a72758 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/Cluster.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/Cluster.java @@ -16,7 +16,9 @@ */ package org.apache.solr.client.solrj.response; +import java.util.Collections; import java.util.List; +import java.util.Objects; /** * This class represents a cluster of Solr Docs . @@ -28,41 +30,43 @@ public class Cluster { private List labels; private double score; private List docIds; + private List subclusters; + private boolean otherTopics; + + public Cluster(List labels, double score, List docIds) { + this(labels, score, docIds, Collections.emptyList(), false); + } /** * @param labels the list of human readable labels associated to the cluster * @param score the score produced by the clustering algorithm for the current cluster * @param docIds the list of document Ids belonging to the cluster */ - public Cluster(List labels, double score, List docIds) { + public Cluster(List labels, double score, List docIds, List subclusters, boolean otherTopics) { this.labels = labels; this.score = score; this.docIds = docIds; + this.subclusters = subclusters; + this.otherTopics = otherTopics; } @Override public boolean equals(Object o) { - if (this == o) return true; - if (!(o instanceof Cluster)) return false; + return o != null && + this.getClass().isInstance(o) && + equalsTo((Cluster) o); + } - Cluster cluster = (Cluster) o; - - if (Double.compare(cluster.score, score) != 0) return false; - if (!docIds.equals(cluster.docIds)) return false; - if (!labels.equals(cluster.labels)) return false; - - return true; + private boolean equalsTo(Cluster o) { + return Double.compare(o.score, score) == 0 && + Objects.equals(o.docIds, docIds) && + Objects.equals(o.labels, labels) && + Objects.equals(o.subclusters, subclusters); } @Override public int hashCode() { - int result; - long temp; - result = labels.hashCode(); - temp = Double.doubleToLongBits(score); - result = 31 * result + (int) (temp ^ (temp >>> 32)); - result = 31 * result + docIds.hashCode(); - return result; + return Objects.hash(subclusters, docIds, labels, score); } public List getLabels() { @@ -89,5 +93,15 @@ public class Cluster { this.docIds = docIds; } + public List getSubclusters() { + return subclusters; + } + /** + * @return If true, the cluster contains references to documents that are not semantically associated + * and form a group of documents not related to any other cluster (or themselves). + */ + public boolean isOtherTopics() { + return otherTopics; + } } \ No newline at end of file diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/ClusteringResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/ClusteringResponse.java index ad6e0484970..73afb6b9e78 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/ClusteringResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/ClusteringResponse.java @@ -15,8 +15,10 @@ * limitations under the License. */ package org.apache.solr.client.solrj.response; -import java.util.LinkedList; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.util.Map; import org.apache.solr.common.util.NamedList; @@ -24,21 +26,47 @@ import org.apache.solr.common.util.NamedList; * Encapsulates responses from ClusteringComponent */ public class ClusteringResponse { - + private static final String CLUSTERS_NODE = "clusters"; private static final String LABELS_NODE = "labels"; private static final String DOCS_NODE = "docs"; private static final String SCORE_NODE = "score"; - private List clusters = new LinkedList(); + private static final String IS_OTHER_TOPICS = "other-topics"; + private List clusters; + @SuppressWarnings("unchecked") public ClusteringResponse(List> clusterInfo) { + clusters = new ArrayList(); for (NamedList clusterNode : clusterInfo) { - List labelList; - List docIdList; - labelList = (List) clusterNode.get(LABELS_NODE); - double score = (double) clusterNode.get(SCORE_NODE); - docIdList = (List) clusterNode.get(DOCS_NODE); - Cluster currentCluster = new Cluster(labelList, score, docIdList); - clusters.add(currentCluster); + List labelList, docIdList; + List subclusters = Collections.emptyList(); + labelList = docIdList = Collections.emptyList(); + Double score = 0d; + boolean otherTopics = false; + for (Map.Entry e : clusterNode) { + switch (e.getKey()) { + case LABELS_NODE: + labelList = (List) e.getValue(); + break; + + case DOCS_NODE: + docIdList = (List) e.getValue(); + break; + + case SCORE_NODE: + score = (Double) e.getValue(); + break; + + case CLUSTERS_NODE: + subclusters = new ClusteringResponse((List>) e.getValue()).getClusters(); + break; + + case IS_OTHER_TOPICS: + otherTopics = (Boolean) e.getValue(); + break; + } + } + + clusters.add(new Cluster(labelList, score, docIdList, subclusters, otherTopics)); } } diff --git a/solr/solrj/src/test-files/solrj/sampleClusteringResponse.xml b/solr/solrj/src/test-files/solrj/sampleClusteringResponse.xml index 16d6e4a9ed3..ea042c9195c 100644 --- a/solr/solrj/src/test-files/solrj/sampleClusteringResponse.xml +++ b/solr/solrj/src/test-files/solrj/sampleClusteringResponse.xml @@ -58,6 +58,25 @@ id2 id3 + + + + label1.sub1 + + + id1 + id2 + + + + + label1.sub2 + + + id2 + + + diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/response/TestClusteringResponse.java b/solr/solrj/src/test/org/apache/solr/client/solrj/response/TestClusteringResponse.java index 5bc20e14c10..7e789d12aeb 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/response/TestClusteringResponse.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/response/TestClusteringResponse.java @@ -19,7 +19,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.nio.charset.StandardCharsets; -import java.util.LinkedList; +import java.util.Arrays; import java.util.List; import org.apache.solr.SolrJettyTestBase; @@ -49,51 +49,21 @@ public class TestClusteringResponse extends SolrJettyTestBase { List clusters = clusteringResponse.getClusters(); Assert.assertEquals(4, clusters.size()); - //First Cluster - Cluster cluster1 = clusters.get(0); - List expectedLabel1 = new LinkedList(); - expectedLabel1.add("label1"); - List expectedDocs1 = new LinkedList(); - expectedDocs1.add("id1"); - expectedDocs1.add("id2"); - expectedDocs1.add("id3"); - Assert.assertEquals(expectedLabel1, cluster1.getLabels()); - Assert.assertEquals(expectedDocs1, cluster1.getDocs()); - Assert.assertEquals(expectedLabel1, cluster1.getLabels()); - Assert.assertEquals(0.6, cluster1.getScore(), 0); - //Second Cluster - Cluster cluster2 = clusters.get(1); - List expectedLabel2 = new LinkedList(); - expectedLabel2.add("label2"); - List expectedDocs2 = new LinkedList(); - expectedDocs2.add("id5"); - expectedDocs2.add("id6"); - Assert.assertEquals(expectedLabel2, cluster2.getLabels()); - Assert.assertEquals(expectedDocs2, cluster2.getDocs()); - Assert.assertEquals(expectedLabel2, cluster2.getLabels()); - Assert.assertEquals(0.93, cluster2.getScore(), 0); - //Third Cluster - Cluster cluster3 = clusters.get(2); - List expectedLabel3 = new LinkedList(); - expectedLabel3.add("label3"); - List expectedDocs3 = new LinkedList(); - expectedDocs3.add("id7"); - expectedDocs3.add("id8"); - Assert.assertEquals(expectedLabel3, cluster3.getLabels()); - Assert.assertEquals(expectedDocs3, cluster3.getDocs()); - Assert.assertEquals(expectedLabel3, cluster3.getLabels()); - Assert.assertEquals(1.26, cluster3.getScore(), 0); - //Fourth Cluster - Cluster cluster4 = clusters.get(3); - List expectedLabel4 = new LinkedList(); - expectedLabel4.add("label4"); - List expectedDocs4 = new LinkedList(); - expectedDocs4.add("id9"); - Assert.assertEquals(expectedLabel4, cluster4.getLabels()); - Assert.assertEquals(expectedDocs4, cluster4.getDocs()); - Assert.assertEquals(expectedLabel4, cluster4.getLabels()); - Assert.assertEquals(0.0, cluster4.getScore(), 0); - + checkCluster(clusters.get(0), Arrays.asList("label1"), Arrays.asList("id1", "id2", "id3"), 0.6d, false); + checkCluster(clusters.get(1), Arrays.asList("label2"), Arrays.asList("id5", "id6"), 0.93d, false); + checkCluster(clusters.get(2), Arrays.asList("label3"), Arrays.asList("id7", "id8"), 1.26d, false); + checkCluster(clusters.get(3), Arrays.asList("label4"), Arrays.asList("id9"), 0d, true); + + List sub = clusters.get(0).getSubclusters(); + checkCluster(sub.get(0), Arrays.asList("label1.sub1"), Arrays.asList("id1", "id2"), 0.0d, false); + checkCluster(sub.get(1), Arrays.asList("label1.sub2"), Arrays.asList("id2"), 0.0d, false); + assertEquals(sub.size(), 2); } + private void checkCluster(Cluster cluster, List labels, List docRefs, double score, boolean otherTopics) { + Assert.assertEquals(cluster.getLabels(), labels); + Assert.assertEquals(cluster.getDocs(), docRefs); + Assert.assertTrue(Double.compare(cluster.getScore(), score) == 0); + Assert.assertEquals(otherTopics, cluster.isOtherTopics()); + } }