mirror of https://github.com/apache/lucene.git
SOLR-9293: Solrj client support for hierarchical clusters and other topics marker.
This commit is contained in:
parent
cc99815dcb
commit
7fb72bfe10
|
@ -81,6 +81,9 @@ Detailed Change List
|
|||
|
||||
New Features
|
||||
----------------------
|
||||
* SOLR-9293: Solrj client support for hierarchical clusters and other topics
|
||||
marker. (Dawid Weiss)
|
||||
|
||||
* SOLR-9681: FacetModule / JSON Facet API added the ability to add filters directly to
|
||||
any facet command. The filters are applied after any domain change operations.
|
||||
Example: { type:terms, field:category, filter:"user:yonik" }
|
||||
|
|
|
@ -16,7 +16,9 @@
|
|||
*/
|
||||
package org.apache.solr.client.solrj.response;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* This class represents a cluster of Solr Docs .
|
||||
|
@ -28,41 +30,43 @@ public class Cluster {
|
|||
private List<String> labels;
|
||||
private double score;
|
||||
private List<String> docIds;
|
||||
private List<Cluster> subclusters;
|
||||
private boolean otherTopics;
|
||||
|
||||
public Cluster(List<String> labels, double score, List<String> docIds) {
|
||||
this(labels, score, docIds, Collections.emptyList(), false);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param labels the list of human readable labels associated to the cluster
|
||||
* @param score the score produced by the clustering algorithm for the current cluster
|
||||
* @param docIds the list of document Ids belonging to the cluster
|
||||
*/
|
||||
public Cluster(List<String> labels, double score, List<String> docIds) {
|
||||
public Cluster(List<String> labels, double score, List<String> docIds, List<Cluster> subclusters, boolean otherTopics) {
|
||||
this.labels = labels;
|
||||
this.score = score;
|
||||
this.docIds = docIds;
|
||||
this.subclusters = subclusters;
|
||||
this.otherTopics = otherTopics;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (!(o instanceof Cluster)) return false;
|
||||
return o != null &&
|
||||
this.getClass().isInstance(o) &&
|
||||
equalsTo((Cluster) o);
|
||||
}
|
||||
|
||||
Cluster cluster = (Cluster) o;
|
||||
|
||||
if (Double.compare(cluster.score, score) != 0) return false;
|
||||
if (!docIds.equals(cluster.docIds)) return false;
|
||||
if (!labels.equals(cluster.labels)) return false;
|
||||
|
||||
return true;
|
||||
private boolean equalsTo(Cluster o) {
|
||||
return Double.compare(o.score, score) == 0 &&
|
||||
Objects.equals(o.docIds, docIds) &&
|
||||
Objects.equals(o.labels, labels) &&
|
||||
Objects.equals(o.subclusters, subclusters);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result;
|
||||
long temp;
|
||||
result = labels.hashCode();
|
||||
temp = Double.doubleToLongBits(score);
|
||||
result = 31 * result + (int) (temp ^ (temp >>> 32));
|
||||
result = 31 * result + docIds.hashCode();
|
||||
return result;
|
||||
return Objects.hash(subclusters, docIds, labels, score);
|
||||
}
|
||||
|
||||
public List<String> getLabels() {
|
||||
|
@ -89,5 +93,15 @@ public class Cluster {
|
|||
this.docIds = docIds;
|
||||
}
|
||||
|
||||
public List<Cluster> getSubclusters() {
|
||||
return subclusters;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return If <code>true</code>, the cluster contains references to documents that are not semantically associated
|
||||
* and form a group of documents not related to any other cluster (or themselves).
|
||||
*/
|
||||
public boolean isOtherTopics() {
|
||||
return otherTopics;
|
||||
}
|
||||
}
|
|
@ -15,8 +15,10 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.client.solrj.response;
|
||||
import java.util.LinkedList;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
||||
|
@ -24,21 +26,47 @@ import org.apache.solr.common.util.NamedList;
|
|||
* Encapsulates responses from ClusteringComponent
|
||||
*/
|
||||
public class ClusteringResponse {
|
||||
|
||||
private static final String CLUSTERS_NODE = "clusters";
|
||||
private static final String LABELS_NODE = "labels";
|
||||
private static final String DOCS_NODE = "docs";
|
||||
private static final String SCORE_NODE = "score";
|
||||
private List<Cluster> clusters = new LinkedList<Cluster>();
|
||||
private static final String IS_OTHER_TOPICS = "other-topics";
|
||||
private List<Cluster> clusters;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public ClusteringResponse(List<NamedList<Object>> clusterInfo) {
|
||||
clusters = new ArrayList<Cluster>();
|
||||
for (NamedList<Object> clusterNode : clusterInfo) {
|
||||
List<String> labelList;
|
||||
List<String> docIdList;
|
||||
labelList = (List<String>) clusterNode.get(LABELS_NODE);
|
||||
double score = (double) clusterNode.get(SCORE_NODE);
|
||||
docIdList = (List<String>) clusterNode.get(DOCS_NODE);
|
||||
Cluster currentCluster = new Cluster(labelList, score, docIdList);
|
||||
clusters.add(currentCluster);
|
||||
List<String> labelList, docIdList;
|
||||
List<Cluster> subclusters = Collections.emptyList();
|
||||
labelList = docIdList = Collections.emptyList();
|
||||
Double score = 0d;
|
||||
boolean otherTopics = false;
|
||||
for (Map.Entry<String, ?> e : clusterNode) {
|
||||
switch (e.getKey()) {
|
||||
case LABELS_NODE:
|
||||
labelList = (List<String>) e.getValue();
|
||||
break;
|
||||
|
||||
case DOCS_NODE:
|
||||
docIdList = (List<String>) e.getValue();
|
||||
break;
|
||||
|
||||
case SCORE_NODE:
|
||||
score = (Double) e.getValue();
|
||||
break;
|
||||
|
||||
case CLUSTERS_NODE:
|
||||
subclusters = new ClusteringResponse((List<NamedList<Object>>) e.getValue()).getClusters();
|
||||
break;
|
||||
|
||||
case IS_OTHER_TOPICS:
|
||||
otherTopics = (Boolean) e.getValue();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
clusters.add(new Cluster(labelList, score, docIdList, subclusters, otherTopics));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -58,6 +58,25 @@
|
|||
<str>id2</str>
|
||||
<str>id3</str>
|
||||
</arr>
|
||||
<arr name="clusters">
|
||||
<lst>
|
||||
<arr name="labels">
|
||||
<str>label1.sub1</str>
|
||||
</arr>
|
||||
<arr name="docs">
|
||||
<str>id1</str>
|
||||
<str>id2</str>
|
||||
</arr>
|
||||
</lst>
|
||||
<lst>
|
||||
<arr name="labels">
|
||||
<str>label1.sub2</str>
|
||||
</arr>
|
||||
<arr name="docs">
|
||||
<str>id2</str>
|
||||
</arr>
|
||||
</lst>
|
||||
</arr>
|
||||
</lst>
|
||||
<lst>
|
||||
<arr name="labels">
|
||||
|
|
|
@ -19,7 +19,7 @@ import java.io.InputStream;
|
|||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.solr.SolrJettyTestBase;
|
||||
|
@ -49,51 +49,21 @@ public class TestClusteringResponse extends SolrJettyTestBase {
|
|||
List<Cluster> clusters = clusteringResponse.getClusters();
|
||||
Assert.assertEquals(4, clusters.size());
|
||||
|
||||
//First Cluster
|
||||
Cluster cluster1 = clusters.get(0);
|
||||
List<String> expectedLabel1 = new LinkedList<String>();
|
||||
expectedLabel1.add("label1");
|
||||
List<String> expectedDocs1 = new LinkedList<String>();
|
||||
expectedDocs1.add("id1");
|
||||
expectedDocs1.add("id2");
|
||||
expectedDocs1.add("id3");
|
||||
Assert.assertEquals(expectedLabel1, cluster1.getLabels());
|
||||
Assert.assertEquals(expectedDocs1, cluster1.getDocs());
|
||||
Assert.assertEquals(expectedLabel1, cluster1.getLabels());
|
||||
Assert.assertEquals(0.6, cluster1.getScore(), 0);
|
||||
//Second Cluster
|
||||
Cluster cluster2 = clusters.get(1);
|
||||
List<String> expectedLabel2 = new LinkedList<String>();
|
||||
expectedLabel2.add("label2");
|
||||
List<String> expectedDocs2 = new LinkedList<String>();
|
||||
expectedDocs2.add("id5");
|
||||
expectedDocs2.add("id6");
|
||||
Assert.assertEquals(expectedLabel2, cluster2.getLabels());
|
||||
Assert.assertEquals(expectedDocs2, cluster2.getDocs());
|
||||
Assert.assertEquals(expectedLabel2, cluster2.getLabels());
|
||||
Assert.assertEquals(0.93, cluster2.getScore(), 0);
|
||||
//Third Cluster
|
||||
Cluster cluster3 = clusters.get(2);
|
||||
List<String> expectedLabel3 = new LinkedList<String>();
|
||||
expectedLabel3.add("label3");
|
||||
List<String> expectedDocs3 = new LinkedList<String>();
|
||||
expectedDocs3.add("id7");
|
||||
expectedDocs3.add("id8");
|
||||
Assert.assertEquals(expectedLabel3, cluster3.getLabels());
|
||||
Assert.assertEquals(expectedDocs3, cluster3.getDocs());
|
||||
Assert.assertEquals(expectedLabel3, cluster3.getLabels());
|
||||
Assert.assertEquals(1.26, cluster3.getScore(), 0);
|
||||
//Fourth Cluster
|
||||
Cluster cluster4 = clusters.get(3);
|
||||
List<String> expectedLabel4 = new LinkedList<String>();
|
||||
expectedLabel4.add("label4");
|
||||
List<String> expectedDocs4 = new LinkedList<String>();
|
||||
expectedDocs4.add("id9");
|
||||
Assert.assertEquals(expectedLabel4, cluster4.getLabels());
|
||||
Assert.assertEquals(expectedDocs4, cluster4.getDocs());
|
||||
Assert.assertEquals(expectedLabel4, cluster4.getLabels());
|
||||
Assert.assertEquals(0.0, cluster4.getScore(), 0);
|
||||
|
||||
checkCluster(clusters.get(0), Arrays.asList("label1"), Arrays.asList("id1", "id2", "id3"), 0.6d, false);
|
||||
checkCluster(clusters.get(1), Arrays.asList("label2"), Arrays.asList("id5", "id6"), 0.93d, false);
|
||||
checkCluster(clusters.get(2), Arrays.asList("label3"), Arrays.asList("id7", "id8"), 1.26d, false);
|
||||
checkCluster(clusters.get(3), Arrays.asList("label4"), Arrays.asList("id9"), 0d, true);
|
||||
|
||||
List<Cluster> sub = clusters.get(0).getSubclusters();
|
||||
checkCluster(sub.get(0), Arrays.asList("label1.sub1"), Arrays.asList("id1", "id2"), 0.0d, false);
|
||||
checkCluster(sub.get(1), Arrays.asList("label1.sub2"), Arrays.asList("id2"), 0.0d, false);
|
||||
assertEquals(sub.size(), 2);
|
||||
}
|
||||
|
||||
private void checkCluster(Cluster cluster, List<String> labels, List<String> docRefs, double score, boolean otherTopics) {
|
||||
Assert.assertEquals(cluster.getLabels(), labels);
|
||||
Assert.assertEquals(cluster.getDocs(), docRefs);
|
||||
Assert.assertTrue(Double.compare(cluster.getScore(), score) == 0);
|
||||
Assert.assertEquals(otherTopics, cluster.isOtherTopics());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue