SOLR-9293: Solrj client support for hierarchical clusters and other topics marker.

This commit is contained in:
Dawid Weiss 2016-11-07 14:34:17 +01:00
parent cc99815dcb
commit 7fb72bfe10
5 changed files with 107 additions and 73 deletions

View File

@ -81,6 +81,9 @@ Detailed Change List
New Features New Features
---------------------- ----------------------
* SOLR-9293: Solrj client support for hierarchical clusters and other topics
marker. (Dawid Weiss)
* SOLR-9681: FacetModule / JSON Facet API added the ability to add filters directly to * SOLR-9681: FacetModule / JSON Facet API added the ability to add filters directly to
any facet command. The filters are applied after any domain change operations. any facet command. The filters are applied after any domain change operations.
Example: { type:terms, field:category, filter:"user:yonik" } Example: { type:terms, field:category, filter:"user:yonik" }

View File

@ -16,7 +16,9 @@
*/ */
package org.apache.solr.client.solrj.response; package org.apache.solr.client.solrj.response;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Objects;
/** /**
* This class represents a cluster of Solr Docs . * This class represents a cluster of Solr Docs .
@ -28,41 +30,43 @@ public class Cluster {
private List<String> labels; private List<String> labels;
private double score; private double score;
private List<String> docIds; private List<String> docIds;
private List<Cluster> subclusters;
private boolean otherTopics;
public Cluster(List<String> labels, double score, List<String> docIds) {
this(labels, score, docIds, Collections.emptyList(), false);
}
/** /**
* @param labels the list of human readable labels associated to the cluster * @param labels the list of human readable labels associated to the cluster
* @param score the score produced by the clustering algorithm for the current cluster * @param score the score produced by the clustering algorithm for the current cluster
* @param docIds the list of document Ids belonging to the cluster * @param docIds the list of document Ids belonging to the cluster
*/ */
public Cluster(List<String> labels, double score, List<String> docIds) { public Cluster(List<String> labels, double score, List<String> docIds, List<Cluster> subclusters, boolean otherTopics) {
this.labels = labels; this.labels = labels;
this.score = score; this.score = score;
this.docIds = docIds; this.docIds = docIds;
this.subclusters = subclusters;
this.otherTopics = otherTopics;
} }
@Override @Override
public boolean equals(Object o) { public boolean equals(Object o) {
if (this == o) return true; return o != null &&
if (!(o instanceof Cluster)) return false; this.getClass().isInstance(o) &&
equalsTo((Cluster) o);
}
Cluster cluster = (Cluster) o; private boolean equalsTo(Cluster o) {
return Double.compare(o.score, score) == 0 &&
if (Double.compare(cluster.score, score) != 0) return false; Objects.equals(o.docIds, docIds) &&
if (!docIds.equals(cluster.docIds)) return false; Objects.equals(o.labels, labels) &&
if (!labels.equals(cluster.labels)) return false; Objects.equals(o.subclusters, subclusters);
return true;
} }
@Override @Override
public int hashCode() { public int hashCode() {
int result; return Objects.hash(subclusters, docIds, labels, score);
long temp;
result = labels.hashCode();
temp = Double.doubleToLongBits(score);
result = 31 * result + (int) (temp ^ (temp >>> 32));
result = 31 * result + docIds.hashCode();
return result;
} }
public List<String> getLabels() { public List<String> getLabels() {
@ -89,5 +93,15 @@ public class Cluster {
this.docIds = docIds; this.docIds = docIds;
} }
public List<Cluster> getSubclusters() {
return subclusters;
}
/**
* @return If <code>true</code>, the cluster contains references to documents that are not semantically associated
* and form a group of documents not related to any other cluster (or themselves).
*/
public boolean isOtherTopics() {
return otherTopics;
}
} }

View File

@ -15,8 +15,10 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.solr.client.solrj.response; package org.apache.solr.client.solrj.response;
import java.util.LinkedList; import java.util.ArrayList;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Map;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
@ -24,21 +26,47 @@ import org.apache.solr.common.util.NamedList;
* Encapsulates responses from ClusteringComponent * Encapsulates responses from ClusteringComponent
*/ */
public class ClusteringResponse { public class ClusteringResponse {
private static final String CLUSTERS_NODE = "clusters";
private static final String LABELS_NODE = "labels"; private static final String LABELS_NODE = "labels";
private static final String DOCS_NODE = "docs"; private static final String DOCS_NODE = "docs";
private static final String SCORE_NODE = "score"; private static final String SCORE_NODE = "score";
private List<Cluster> clusters = new LinkedList<Cluster>(); private static final String IS_OTHER_TOPICS = "other-topics";
private List<Cluster> clusters;
@SuppressWarnings("unchecked")
public ClusteringResponse(List<NamedList<Object>> clusterInfo) { public ClusteringResponse(List<NamedList<Object>> clusterInfo) {
clusters = new ArrayList<Cluster>();
for (NamedList<Object> clusterNode : clusterInfo) { for (NamedList<Object> clusterNode : clusterInfo) {
List<String> labelList; List<String> labelList, docIdList;
List<String> docIdList; List<Cluster> subclusters = Collections.emptyList();
labelList = (List<String>) clusterNode.get(LABELS_NODE); labelList = docIdList = Collections.emptyList();
double score = (double) clusterNode.get(SCORE_NODE); Double score = 0d;
docIdList = (List<String>) clusterNode.get(DOCS_NODE); boolean otherTopics = false;
Cluster currentCluster = new Cluster(labelList, score, docIdList); for (Map.Entry<String, ?> e : clusterNode) {
clusters.add(currentCluster); switch (e.getKey()) {
case LABELS_NODE:
labelList = (List<String>) e.getValue();
break;
case DOCS_NODE:
docIdList = (List<String>) e.getValue();
break;
case SCORE_NODE:
score = (Double) e.getValue();
break;
case CLUSTERS_NODE:
subclusters = new ClusteringResponse((List<NamedList<Object>>) e.getValue()).getClusters();
break;
case IS_OTHER_TOPICS:
otherTopics = (Boolean) e.getValue();
break;
}
}
clusters.add(new Cluster(labelList, score, docIdList, subclusters, otherTopics));
} }
} }

View File

@ -58,6 +58,25 @@
<str>id2</str> <str>id2</str>
<str>id3</str> <str>id3</str>
</arr> </arr>
<arr name="clusters">
<lst>
<arr name="labels">
<str>label1.sub1</str>
</arr>
<arr name="docs">
<str>id1</str>
<str>id2</str>
</arr>
</lst>
<lst>
<arr name="labels">
<str>label1.sub2</str>
</arr>
<arr name="docs">
<str>id2</str>
</arr>
</lst>
</arr>
</lst> </lst>
<lst> <lst>
<arr name="labels"> <arr name="labels">

View File

@ -19,7 +19,7 @@ import java.io.InputStream;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.io.Reader; import java.io.Reader;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.LinkedList; import java.util.Arrays;
import java.util.List; import java.util.List;
import org.apache.solr.SolrJettyTestBase; import org.apache.solr.SolrJettyTestBase;
@ -49,51 +49,21 @@ public class TestClusteringResponse extends SolrJettyTestBase {
List<Cluster> clusters = clusteringResponse.getClusters(); List<Cluster> clusters = clusteringResponse.getClusters();
Assert.assertEquals(4, clusters.size()); Assert.assertEquals(4, clusters.size());
//First Cluster checkCluster(clusters.get(0), Arrays.asList("label1"), Arrays.asList("id1", "id2", "id3"), 0.6d, false);
Cluster cluster1 = clusters.get(0); checkCluster(clusters.get(1), Arrays.asList("label2"), Arrays.asList("id5", "id6"), 0.93d, false);
List<String> expectedLabel1 = new LinkedList<String>(); checkCluster(clusters.get(2), Arrays.asList("label3"), Arrays.asList("id7", "id8"), 1.26d, false);
expectedLabel1.add("label1"); checkCluster(clusters.get(3), Arrays.asList("label4"), Arrays.asList("id9"), 0d, true);
List<String> expectedDocs1 = new LinkedList<String>();
expectedDocs1.add("id1");
expectedDocs1.add("id2");
expectedDocs1.add("id3");
Assert.assertEquals(expectedLabel1, cluster1.getLabels());
Assert.assertEquals(expectedDocs1, cluster1.getDocs());
Assert.assertEquals(expectedLabel1, cluster1.getLabels());
Assert.assertEquals(0.6, cluster1.getScore(), 0);
//Second Cluster
Cluster cluster2 = clusters.get(1);
List<String> expectedLabel2 = new LinkedList<String>();
expectedLabel2.add("label2");
List<String> expectedDocs2 = new LinkedList<String>();
expectedDocs2.add("id5");
expectedDocs2.add("id6");
Assert.assertEquals(expectedLabel2, cluster2.getLabels());
Assert.assertEquals(expectedDocs2, cluster2.getDocs());
Assert.assertEquals(expectedLabel2, cluster2.getLabels());
Assert.assertEquals(0.93, cluster2.getScore(), 0);
//Third Cluster
Cluster cluster3 = clusters.get(2);
List<String> expectedLabel3 = new LinkedList<String>();
expectedLabel3.add("label3");
List<String> expectedDocs3 = new LinkedList<String>();
expectedDocs3.add("id7");
expectedDocs3.add("id8");
Assert.assertEquals(expectedLabel3, cluster3.getLabels());
Assert.assertEquals(expectedDocs3, cluster3.getDocs());
Assert.assertEquals(expectedLabel3, cluster3.getLabels());
Assert.assertEquals(1.26, cluster3.getScore(), 0);
//Fourth Cluster
Cluster cluster4 = clusters.get(3);
List<String> expectedLabel4 = new LinkedList<String>();
expectedLabel4.add("label4");
List<String> expectedDocs4 = new LinkedList<String>();
expectedDocs4.add("id9");
Assert.assertEquals(expectedLabel4, cluster4.getLabels());
Assert.assertEquals(expectedDocs4, cluster4.getDocs());
Assert.assertEquals(expectedLabel4, cluster4.getLabels());
Assert.assertEquals(0.0, cluster4.getScore(), 0);
List<Cluster> sub = clusters.get(0).getSubclusters();
checkCluster(sub.get(0), Arrays.asList("label1.sub1"), Arrays.asList("id1", "id2"), 0.0d, false);
checkCluster(sub.get(1), Arrays.asList("label1.sub2"), Arrays.asList("id2"), 0.0d, false);
assertEquals(sub.size(), 2);
} }
private void checkCluster(Cluster cluster, List<String> labels, List<String> docRefs, double score, boolean otherTopics) {
Assert.assertEquals(cluster.getLabels(), labels);
Assert.assertEquals(cluster.getDocs(), docRefs);
Assert.assertTrue(Double.compare(cluster.getScore(), score) == 0);
Assert.assertEquals(otherTopics, cluster.isOtherTopics());
}
} }