mirror of https://github.com/apache/lucene.git
SOLR-9293: Solrj client support for hierarchical clusters and other topics marker.
This commit is contained in:
parent
cc99815dcb
commit
7fb72bfe10
|
@ -81,6 +81,9 @@ Detailed Change List
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
----------------------
|
----------------------
|
||||||
|
* SOLR-9293: Solrj client support for hierarchical clusters and other topics
|
||||||
|
marker. (Dawid Weiss)
|
||||||
|
|
||||||
* SOLR-9681: FacetModule / JSON Facet API added the ability to add filters directly to
|
* SOLR-9681: FacetModule / JSON Facet API added the ability to add filters directly to
|
||||||
any facet command. The filters are applied after any domain change operations.
|
any facet command. The filters are applied after any domain change operations.
|
||||||
Example: { type:terms, field:category, filter:"user:yonik" }
|
Example: { type:terms, field:category, filter:"user:yonik" }
|
||||||
|
|
|
@ -16,7 +16,9 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.client.solrj.response;
|
package org.apache.solr.client.solrj.response;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class represents a cluster of Solr Docs .
|
* This class represents a cluster of Solr Docs .
|
||||||
|
@ -28,41 +30,43 @@ public class Cluster {
|
||||||
private List<String> labels;
|
private List<String> labels;
|
||||||
private double score;
|
private double score;
|
||||||
private List<String> docIds;
|
private List<String> docIds;
|
||||||
|
private List<Cluster> subclusters;
|
||||||
|
private boolean otherTopics;
|
||||||
|
|
||||||
|
public Cluster(List<String> labels, double score, List<String> docIds) {
|
||||||
|
this(labels, score, docIds, Collections.emptyList(), false);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param labels the list of human readable labels associated to the cluster
|
* @param labels the list of human readable labels associated to the cluster
|
||||||
* @param score the score produced by the clustering algorithm for the current cluster
|
* @param score the score produced by the clustering algorithm for the current cluster
|
||||||
* @param docIds the list of document Ids belonging to the cluster
|
* @param docIds the list of document Ids belonging to the cluster
|
||||||
*/
|
*/
|
||||||
public Cluster(List<String> labels, double score, List<String> docIds) {
|
public Cluster(List<String> labels, double score, List<String> docIds, List<Cluster> subclusters, boolean otherTopics) {
|
||||||
this.labels = labels;
|
this.labels = labels;
|
||||||
this.score = score;
|
this.score = score;
|
||||||
this.docIds = docIds;
|
this.docIds = docIds;
|
||||||
|
this.subclusters = subclusters;
|
||||||
|
this.otherTopics = otherTopics;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object o) {
|
public boolean equals(Object o) {
|
||||||
if (this == o) return true;
|
return o != null &&
|
||||||
if (!(o instanceof Cluster)) return false;
|
this.getClass().isInstance(o) &&
|
||||||
|
equalsTo((Cluster) o);
|
||||||
|
}
|
||||||
|
|
||||||
Cluster cluster = (Cluster) o;
|
private boolean equalsTo(Cluster o) {
|
||||||
|
return Double.compare(o.score, score) == 0 &&
|
||||||
if (Double.compare(cluster.score, score) != 0) return false;
|
Objects.equals(o.docIds, docIds) &&
|
||||||
if (!docIds.equals(cluster.docIds)) return false;
|
Objects.equals(o.labels, labels) &&
|
||||||
if (!labels.equals(cluster.labels)) return false;
|
Objects.equals(o.subclusters, subclusters);
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
int result;
|
return Objects.hash(subclusters, docIds, labels, score);
|
||||||
long temp;
|
|
||||||
result = labels.hashCode();
|
|
||||||
temp = Double.doubleToLongBits(score);
|
|
||||||
result = 31 * result + (int) (temp ^ (temp >>> 32));
|
|
||||||
result = 31 * result + docIds.hashCode();
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getLabels() {
|
public List<String> getLabels() {
|
||||||
|
@ -89,5 +93,15 @@ public class Cluster {
|
||||||
this.docIds = docIds;
|
this.docIds = docIds;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<Cluster> getSubclusters() {
|
||||||
|
return subclusters;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return If <code>true</code>, the cluster contains references to documents that are not semantically associated
|
||||||
|
* and form a group of documents not related to any other cluster (or themselves).
|
||||||
|
*/
|
||||||
|
public boolean isOtherTopics() {
|
||||||
|
return otherTopics;
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -15,8 +15,10 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.client.solrj.response;
|
package org.apache.solr.client.solrj.response;
|
||||||
import java.util.LinkedList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
|
||||||
|
@ -24,21 +26,47 @@ import org.apache.solr.common.util.NamedList;
|
||||||
* Encapsulates responses from ClusteringComponent
|
* Encapsulates responses from ClusteringComponent
|
||||||
*/
|
*/
|
||||||
public class ClusteringResponse {
|
public class ClusteringResponse {
|
||||||
|
private static final String CLUSTERS_NODE = "clusters";
|
||||||
private static final String LABELS_NODE = "labels";
|
private static final String LABELS_NODE = "labels";
|
||||||
private static final String DOCS_NODE = "docs";
|
private static final String DOCS_NODE = "docs";
|
||||||
private static final String SCORE_NODE = "score";
|
private static final String SCORE_NODE = "score";
|
||||||
private List<Cluster> clusters = new LinkedList<Cluster>();
|
private static final String IS_OTHER_TOPICS = "other-topics";
|
||||||
|
private List<Cluster> clusters;
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
public ClusteringResponse(List<NamedList<Object>> clusterInfo) {
|
public ClusteringResponse(List<NamedList<Object>> clusterInfo) {
|
||||||
|
clusters = new ArrayList<Cluster>();
|
||||||
for (NamedList<Object> clusterNode : clusterInfo) {
|
for (NamedList<Object> clusterNode : clusterInfo) {
|
||||||
List<String> labelList;
|
List<String> labelList, docIdList;
|
||||||
List<String> docIdList;
|
List<Cluster> subclusters = Collections.emptyList();
|
||||||
labelList = (List<String>) clusterNode.get(LABELS_NODE);
|
labelList = docIdList = Collections.emptyList();
|
||||||
double score = (double) clusterNode.get(SCORE_NODE);
|
Double score = 0d;
|
||||||
docIdList = (List<String>) clusterNode.get(DOCS_NODE);
|
boolean otherTopics = false;
|
||||||
Cluster currentCluster = new Cluster(labelList, score, docIdList);
|
for (Map.Entry<String, ?> e : clusterNode) {
|
||||||
clusters.add(currentCluster);
|
switch (e.getKey()) {
|
||||||
|
case LABELS_NODE:
|
||||||
|
labelList = (List<String>) e.getValue();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case DOCS_NODE:
|
||||||
|
docIdList = (List<String>) e.getValue();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SCORE_NODE:
|
||||||
|
score = (Double) e.getValue();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CLUSTERS_NODE:
|
||||||
|
subclusters = new ClusteringResponse((List<NamedList<Object>>) e.getValue()).getClusters();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case IS_OTHER_TOPICS:
|
||||||
|
otherTopics = (Boolean) e.getValue();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
clusters.add(new Cluster(labelList, score, docIdList, subclusters, otherTopics));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -58,6 +58,25 @@
|
||||||
<str>id2</str>
|
<str>id2</str>
|
||||||
<str>id3</str>
|
<str>id3</str>
|
||||||
</arr>
|
</arr>
|
||||||
|
<arr name="clusters">
|
||||||
|
<lst>
|
||||||
|
<arr name="labels">
|
||||||
|
<str>label1.sub1</str>
|
||||||
|
</arr>
|
||||||
|
<arr name="docs">
|
||||||
|
<str>id1</str>
|
||||||
|
<str>id2</str>
|
||||||
|
</arr>
|
||||||
|
</lst>
|
||||||
|
<lst>
|
||||||
|
<arr name="labels">
|
||||||
|
<str>label1.sub2</str>
|
||||||
|
</arr>
|
||||||
|
<arr name="docs">
|
||||||
|
<str>id2</str>
|
||||||
|
</arr>
|
||||||
|
</lst>
|
||||||
|
</arr>
|
||||||
</lst>
|
</lst>
|
||||||
<lst>
|
<lst>
|
||||||
<arr name="labels">
|
<arr name="labels">
|
||||||
|
|
|
@ -19,7 +19,7 @@ import java.io.InputStream;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.LinkedList;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.solr.SolrJettyTestBase;
|
import org.apache.solr.SolrJettyTestBase;
|
||||||
|
@ -49,51 +49,21 @@ public class TestClusteringResponse extends SolrJettyTestBase {
|
||||||
List<Cluster> clusters = clusteringResponse.getClusters();
|
List<Cluster> clusters = clusteringResponse.getClusters();
|
||||||
Assert.assertEquals(4, clusters.size());
|
Assert.assertEquals(4, clusters.size());
|
||||||
|
|
||||||
//First Cluster
|
checkCluster(clusters.get(0), Arrays.asList("label1"), Arrays.asList("id1", "id2", "id3"), 0.6d, false);
|
||||||
Cluster cluster1 = clusters.get(0);
|
checkCluster(clusters.get(1), Arrays.asList("label2"), Arrays.asList("id5", "id6"), 0.93d, false);
|
||||||
List<String> expectedLabel1 = new LinkedList<String>();
|
checkCluster(clusters.get(2), Arrays.asList("label3"), Arrays.asList("id7", "id8"), 1.26d, false);
|
||||||
expectedLabel1.add("label1");
|
checkCluster(clusters.get(3), Arrays.asList("label4"), Arrays.asList("id9"), 0d, true);
|
||||||
List<String> expectedDocs1 = new LinkedList<String>();
|
|
||||||
expectedDocs1.add("id1");
|
|
||||||
expectedDocs1.add("id2");
|
|
||||||
expectedDocs1.add("id3");
|
|
||||||
Assert.assertEquals(expectedLabel1, cluster1.getLabels());
|
|
||||||
Assert.assertEquals(expectedDocs1, cluster1.getDocs());
|
|
||||||
Assert.assertEquals(expectedLabel1, cluster1.getLabels());
|
|
||||||
Assert.assertEquals(0.6, cluster1.getScore(), 0);
|
|
||||||
//Second Cluster
|
|
||||||
Cluster cluster2 = clusters.get(1);
|
|
||||||
List<String> expectedLabel2 = new LinkedList<String>();
|
|
||||||
expectedLabel2.add("label2");
|
|
||||||
List<String> expectedDocs2 = new LinkedList<String>();
|
|
||||||
expectedDocs2.add("id5");
|
|
||||||
expectedDocs2.add("id6");
|
|
||||||
Assert.assertEquals(expectedLabel2, cluster2.getLabels());
|
|
||||||
Assert.assertEquals(expectedDocs2, cluster2.getDocs());
|
|
||||||
Assert.assertEquals(expectedLabel2, cluster2.getLabels());
|
|
||||||
Assert.assertEquals(0.93, cluster2.getScore(), 0);
|
|
||||||
//Third Cluster
|
|
||||||
Cluster cluster3 = clusters.get(2);
|
|
||||||
List<String> expectedLabel3 = new LinkedList<String>();
|
|
||||||
expectedLabel3.add("label3");
|
|
||||||
List<String> expectedDocs3 = new LinkedList<String>();
|
|
||||||
expectedDocs3.add("id7");
|
|
||||||
expectedDocs3.add("id8");
|
|
||||||
Assert.assertEquals(expectedLabel3, cluster3.getLabels());
|
|
||||||
Assert.assertEquals(expectedDocs3, cluster3.getDocs());
|
|
||||||
Assert.assertEquals(expectedLabel3, cluster3.getLabels());
|
|
||||||
Assert.assertEquals(1.26, cluster3.getScore(), 0);
|
|
||||||
//Fourth Cluster
|
|
||||||
Cluster cluster4 = clusters.get(3);
|
|
||||||
List<String> expectedLabel4 = new LinkedList<String>();
|
|
||||||
expectedLabel4.add("label4");
|
|
||||||
List<String> expectedDocs4 = new LinkedList<String>();
|
|
||||||
expectedDocs4.add("id9");
|
|
||||||
Assert.assertEquals(expectedLabel4, cluster4.getLabels());
|
|
||||||
Assert.assertEquals(expectedDocs4, cluster4.getDocs());
|
|
||||||
Assert.assertEquals(expectedLabel4, cluster4.getLabels());
|
|
||||||
Assert.assertEquals(0.0, cluster4.getScore(), 0);
|
|
||||||
|
|
||||||
|
List<Cluster> sub = clusters.get(0).getSubclusters();
|
||||||
|
checkCluster(sub.get(0), Arrays.asList("label1.sub1"), Arrays.asList("id1", "id2"), 0.0d, false);
|
||||||
|
checkCluster(sub.get(1), Arrays.asList("label1.sub2"), Arrays.asList("id2"), 0.0d, false);
|
||||||
|
assertEquals(sub.size(), 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void checkCluster(Cluster cluster, List<String> labels, List<String> docRefs, double score, boolean otherTopics) {
|
||||||
|
Assert.assertEquals(cluster.getLabels(), labels);
|
||||||
|
Assert.assertEquals(cluster.getDocs(), docRefs);
|
||||||
|
Assert.assertTrue(Double.compare(cluster.getScore(), score) == 0);
|
||||||
|
Assert.assertEquals(otherTopics, cluster.isOtherTopics());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue