SOLR-9293: Solrj client support for hierarchical clusters and other topics marker.

This commit is contained in:
Dawid Weiss 2016-11-07 14:34:17 +01:00
parent cc99815dcb
commit 7fb72bfe10
5 changed files with 107 additions and 73 deletions

View File

@ -81,6 +81,9 @@ Detailed Change List
New Features
----------------------
* SOLR-9293: Solrj client support for hierarchical clusters and other topics
marker. (Dawid Weiss)
* SOLR-9681: FacetModule / JSON Facet API added the ability to add filters directly to
any facet command. The filters are applied after any domain change operations.
Example: { type:terms, field:category, filter:"user:yonik" }

View File

@ -16,7 +16,9 @@
*/
package org.apache.solr.client.solrj.response;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
/**
* This class represents a cluster of Solr Docs .
@ -28,41 +30,43 @@ public class Cluster {
private List<String> labels;
private double score;
private List<String> docIds;
private List<Cluster> subclusters;
private boolean otherTopics;
public Cluster(List<String> labels, double score, List<String> docIds) {
this(labels, score, docIds, Collections.emptyList(), false);
}
/**
* @param labels the list of human readable labels associated to the cluster
* @param score the score produced by the clustering algorithm for the current cluster
* @param docIds the list of document Ids belonging to the cluster
*/
public Cluster(List<String> labels, double score, List<String> docIds) {
public Cluster(List<String> labels, double score, List<String> docIds, List<Cluster> subclusters, boolean otherTopics) {
this.labels = labels;
this.score = score;
this.docIds = docIds;
this.subclusters = subclusters;
this.otherTopics = otherTopics;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Cluster)) return false;
return o != null &&
this.getClass().isInstance(o) &&
equalsTo((Cluster) o);
}
Cluster cluster = (Cluster) o;
if (Double.compare(cluster.score, score) != 0) return false;
if (!docIds.equals(cluster.docIds)) return false;
if (!labels.equals(cluster.labels)) return false;
return true;
private boolean equalsTo(Cluster o) {
return Double.compare(o.score, score) == 0 &&
Objects.equals(o.docIds, docIds) &&
Objects.equals(o.labels, labels) &&
Objects.equals(o.subclusters, subclusters);
}
@Override
public int hashCode() {
int result;
long temp;
result = labels.hashCode();
temp = Double.doubleToLongBits(score);
result = 31 * result + (int) (temp ^ (temp >>> 32));
result = 31 * result + docIds.hashCode();
return result;
return Objects.hash(subclusters, docIds, labels, score);
}
public List<String> getLabels() {
@ -89,5 +93,15 @@ public class Cluster {
this.docIds = docIds;
}
public List<Cluster> getSubclusters() {
return subclusters;
}
/**
* @return If <code>true</code>, the cluster contains references to documents that are not semantically associated
* and form a group of documents not related to any other cluster (or themselves).
*/
public boolean isOtherTopics() {
return otherTopics;
}
}

View File

@ -15,8 +15,10 @@
* limitations under the License.
*/
package org.apache.solr.client.solrj.response;
import java.util.LinkedList;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.solr.common.util.NamedList;
@ -24,21 +26,47 @@ import org.apache.solr.common.util.NamedList;
* Encapsulates responses from ClusteringComponent
*/
public class ClusteringResponse {
private static final String CLUSTERS_NODE = "clusters";
private static final String LABELS_NODE = "labels";
private static final String DOCS_NODE = "docs";
private static final String SCORE_NODE = "score";
private List<Cluster> clusters = new LinkedList<Cluster>();
private static final String IS_OTHER_TOPICS = "other-topics";
private List<Cluster> clusters;
@SuppressWarnings("unchecked")
public ClusteringResponse(List<NamedList<Object>> clusterInfo) {
clusters = new ArrayList<Cluster>();
for (NamedList<Object> clusterNode : clusterInfo) {
List<String> labelList;
List<String> docIdList;
labelList = (List<String>) clusterNode.get(LABELS_NODE);
double score = (double) clusterNode.get(SCORE_NODE);
docIdList = (List<String>) clusterNode.get(DOCS_NODE);
Cluster currentCluster = new Cluster(labelList, score, docIdList);
clusters.add(currentCluster);
List<String> labelList, docIdList;
List<Cluster> subclusters = Collections.emptyList();
labelList = docIdList = Collections.emptyList();
Double score = 0d;
boolean otherTopics = false;
for (Map.Entry<String, ?> e : clusterNode) {
switch (e.getKey()) {
case LABELS_NODE:
labelList = (List<String>) e.getValue();
break;
case DOCS_NODE:
docIdList = (List<String>) e.getValue();
break;
case SCORE_NODE:
score = (Double) e.getValue();
break;
case CLUSTERS_NODE:
subclusters = new ClusteringResponse((List<NamedList<Object>>) e.getValue()).getClusters();
break;
case IS_OTHER_TOPICS:
otherTopics = (Boolean) e.getValue();
break;
}
}
clusters.add(new Cluster(labelList, score, docIdList, subclusters, otherTopics));
}
}

View File

@ -58,6 +58,25 @@
<str>id2</str>
<str>id3</str>
</arr>
<arr name="clusters">
<lst>
<arr name="labels">
<str>label1.sub1</str>
</arr>
<arr name="docs">
<str>id1</str>
<str>id2</str>
</arr>
</lst>
<lst>
<arr name="labels">
<str>label1.sub2</str>
</arr>
<arr name="docs">
<str>id2</str>
</arr>
</lst>
</arr>
</lst>
<lst>
<arr name="labels">

View File

@ -19,7 +19,7 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.util.LinkedList;
import java.util.Arrays;
import java.util.List;
import org.apache.solr.SolrJettyTestBase;
@ -49,51 +49,21 @@ public class TestClusteringResponse extends SolrJettyTestBase {
List<Cluster> clusters = clusteringResponse.getClusters();
Assert.assertEquals(4, clusters.size());
//First Cluster
Cluster cluster1 = clusters.get(0);
List<String> expectedLabel1 = new LinkedList<String>();
expectedLabel1.add("label1");
List<String> expectedDocs1 = new LinkedList<String>();
expectedDocs1.add("id1");
expectedDocs1.add("id2");
expectedDocs1.add("id3");
Assert.assertEquals(expectedLabel1, cluster1.getLabels());
Assert.assertEquals(expectedDocs1, cluster1.getDocs());
Assert.assertEquals(expectedLabel1, cluster1.getLabels());
Assert.assertEquals(0.6, cluster1.getScore(), 0);
//Second Cluster
Cluster cluster2 = clusters.get(1);
List<String> expectedLabel2 = new LinkedList<String>();
expectedLabel2.add("label2");
List<String> expectedDocs2 = new LinkedList<String>();
expectedDocs2.add("id5");
expectedDocs2.add("id6");
Assert.assertEquals(expectedLabel2, cluster2.getLabels());
Assert.assertEquals(expectedDocs2, cluster2.getDocs());
Assert.assertEquals(expectedLabel2, cluster2.getLabels());
Assert.assertEquals(0.93, cluster2.getScore(), 0);
//Third Cluster
Cluster cluster3 = clusters.get(2);
List<String> expectedLabel3 = new LinkedList<String>();
expectedLabel3.add("label3");
List<String> expectedDocs3 = new LinkedList<String>();
expectedDocs3.add("id7");
expectedDocs3.add("id8");
Assert.assertEquals(expectedLabel3, cluster3.getLabels());
Assert.assertEquals(expectedDocs3, cluster3.getDocs());
Assert.assertEquals(expectedLabel3, cluster3.getLabels());
Assert.assertEquals(1.26, cluster3.getScore(), 0);
//Fourth Cluster
Cluster cluster4 = clusters.get(3);
List<String> expectedLabel4 = new LinkedList<String>();
expectedLabel4.add("label4");
List<String> expectedDocs4 = new LinkedList<String>();
expectedDocs4.add("id9");
Assert.assertEquals(expectedLabel4, cluster4.getLabels());
Assert.assertEquals(expectedDocs4, cluster4.getDocs());
Assert.assertEquals(expectedLabel4, cluster4.getLabels());
Assert.assertEquals(0.0, cluster4.getScore(), 0);
checkCluster(clusters.get(0), Arrays.asList("label1"), Arrays.asList("id1", "id2", "id3"), 0.6d, false);
checkCluster(clusters.get(1), Arrays.asList("label2"), Arrays.asList("id5", "id6"), 0.93d, false);
checkCluster(clusters.get(2), Arrays.asList("label3"), Arrays.asList("id7", "id8"), 1.26d, false);
checkCluster(clusters.get(3), Arrays.asList("label4"), Arrays.asList("id9"), 0d, true);
List<Cluster> sub = clusters.get(0).getSubclusters();
checkCluster(sub.get(0), Arrays.asList("label1.sub1"), Arrays.asList("id1", "id2"), 0.0d, false);
checkCluster(sub.get(1), Arrays.asList("label1.sub2"), Arrays.asList("id2"), 0.0d, false);
assertEquals(sub.size(), 2);
}
private void checkCluster(Cluster cluster, List<String> labels, List<String> docRefs, double score, boolean otherTopics) {
Assert.assertEquals(cluster.getLabels(), labels);
Assert.assertEquals(cluster.getDocs(), docRefs);
Assert.assertTrue(Double.compare(cluster.getScore(), score) == 0);
Assert.assertEquals(otherTopics, cluster.isOtherTopics());
}
}