mirror of https://github.com/apache/lucene.git
SOLR-10505: Add multi-field support to TermsComponent for terms stats
This commit is contained in:
parent
ffe61ff2ad
commit
19bcffa036
|
@ -175,6 +175,8 @@ New Features
|
||||||
Example: json.facet={x:"stddev(field1)", y:"variance(field2)"}
|
Example: json.facet={x:"stddev(field1)", y:"variance(field2)"}
|
||||||
(Rustam Hashimov, yonik)
|
(Rustam Hashimov, yonik)
|
||||||
|
|
||||||
|
* SOLR-10505: Add multi-field support to TermsComponent when requesting terms' statistics. (Shai Erera)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -101,8 +101,8 @@ public class TermsComponent extends SearchComponent {
|
||||||
|
|
||||||
boolean termStats = params.getBool(TermsParams.TERMS_STATS, false);
|
boolean termStats = params.getBool(TermsParams.TERMS_STATS, false);
|
||||||
|
|
||||||
if(termStats) {
|
if (termStats) {
|
||||||
NamedList<Number> stats = new SimpleOrderedMap();
|
NamedList<Number> stats = new SimpleOrderedMap<>();
|
||||||
rb.rsp.add("indexstats", stats);
|
rb.rsp.add("indexstats", stats);
|
||||||
collectStats(rb.req.getSearcher(), stats);
|
collectStats(rb.req.getSearcher(), stats);
|
||||||
}
|
}
|
||||||
|
@ -335,7 +335,7 @@ public class TermsComponent extends SearchComponent {
|
||||||
rb._termsHelper = null;
|
rb._termsHelper = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private ShardRequest createShardQuery(SolrParams params) {
|
private static ShardRequest createShardQuery(SolrParams params) {
|
||||||
ShardRequest sreq = new ShardRequest();
|
ShardRequest sreq = new ShardRequest();
|
||||||
sreq.purpose = ShardRequest.PURPOSE_GET_TERMS;
|
sreq.purpose = ShardRequest.PURPOSE_GET_TERMS;
|
||||||
|
|
||||||
|
@ -410,7 +410,7 @@ public class TermsComponent extends SearchComponent {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public NamedList buildResponse() {
|
public NamedList<Object> buildResponse() {
|
||||||
NamedList<Object> response = new SimpleOrderedMap<>();
|
NamedList<Object> response = new SimpleOrderedMap<>();
|
||||||
|
|
||||||
// determine if we are going index or count sort
|
// determine if we are going index or count sort
|
||||||
|
@ -480,7 +480,7 @@ public class TermsComponent extends SearchComponent {
|
||||||
}
|
}
|
||||||
|
|
||||||
// use <int> tags for smaller facet counts (better back compatibility)
|
// use <int> tags for smaller facet counts (better back compatibility)
|
||||||
private Number num(long val) {
|
private static Number num(long val) {
|
||||||
if (val < Integer.MAX_VALUE) return (int) val;
|
if (val < Integer.MAX_VALUE) return (int) val;
|
||||||
else return val;
|
else return val;
|
||||||
}
|
}
|
||||||
|
@ -515,53 +515,51 @@ public class TermsComponent extends SearchComponent {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void fetchTerms(SolrIndexSearcher indexSearcher,
|
private static void fetchTerms(SolrIndexSearcher indexSearcher, String[] fields, String termList,
|
||||||
String[] fields,
|
boolean includeTotalTermFreq, NamedList<Object> result) throws IOException {
|
||||||
String termList,
|
|
||||||
boolean includeTotalTermFreq,
|
|
||||||
NamedList result) throws IOException {
|
|
||||||
|
|
||||||
String field = fields[0];
|
|
||||||
FieldType fieldType = indexSearcher.getSchema().getField(field).getType();
|
|
||||||
String[] splitTerms = termList.split(",");
|
String[] splitTerms = termList.split(",");
|
||||||
|
for (int i = 0; i < splitTerms.length; i++) {
|
||||||
for(int i=0; i<splitTerms.length; i++) {
|
|
||||||
splitTerms[i] = splitTerms[i].trim();
|
splitTerms[i] = splitTerms[i].trim();
|
||||||
}
|
}
|
||||||
|
// Sort the terms once
|
||||||
Term[] terms = new Term[splitTerms.length];
|
Arrays.sort(splitTerms);
|
||||||
for(int i=0; i<splitTerms.length; i++) {
|
|
||||||
terms[i] = new Term(field, fieldType.readableToIndexed(splitTerms[i]));
|
|
||||||
}
|
|
||||||
|
|
||||||
Arrays.sort(terms);
|
|
||||||
|
|
||||||
IndexReaderContext topReaderContext = indexSearcher.getTopReaderContext();
|
IndexReaderContext topReaderContext = indexSearcher.getTopReaderContext();
|
||||||
TermContext[] termContexts = new TermContext[terms.length];
|
for (String field : fields) {
|
||||||
collectTermContext(topReaderContext, termContexts, terms);
|
FieldType fieldType = indexSearcher.getSchema().getField(field).getType();
|
||||||
|
|
||||||
NamedList termsMap = new SimpleOrderedMap();
|
// Since splitTerms is already sorted, this array will also be sorted
|
||||||
for (int i = 0; i < terms.length; i++) {
|
Term[] terms = new Term[splitTerms.length];
|
||||||
if (termContexts[i] != null) {
|
for (int i = 0; i < splitTerms.length; i++) {
|
||||||
String outTerm = fieldType.indexedToReadable(terms[i].bytes().utf8ToString());
|
terms[i] = new Term(field, fieldType.readableToIndexed(splitTerms[i]));
|
||||||
int docFreq = termContexts[i].docFreq();
|
}
|
||||||
if (!includeTotalTermFreq) {
|
|
||||||
termsMap.add(outTerm, docFreq);
|
TermContext[] termContexts = new TermContext[terms.length];
|
||||||
} else {
|
collectTermContext(topReaderContext, termContexts, terms);
|
||||||
long totalTermFreq = termContexts[i].totalTermFreq();
|
|
||||||
NamedList<Long> termStats = new SimpleOrderedMap<>();
|
NamedList<Object> termsMap = new SimpleOrderedMap<>();
|
||||||
termStats.add("docFreq", (long) docFreq);
|
for (int i = 0; i < terms.length; i++) {
|
||||||
termStats.add("totalTermFreq", totalTermFreq);
|
if (termContexts[i] != null) {
|
||||||
termsMap.add(outTerm, termStats);
|
String outTerm = fieldType.indexedToReadable(terms[i].bytes().utf8ToString());
|
||||||
|
int docFreq = termContexts[i].docFreq();
|
||||||
|
if (!includeTotalTermFreq) {
|
||||||
|
termsMap.add(outTerm, docFreq);
|
||||||
|
} else {
|
||||||
|
long totalTermFreq = termContexts[i].totalTermFreq();
|
||||||
|
NamedList<Long> termStats = new SimpleOrderedMap<>();
|
||||||
|
termStats.add("docFreq", (long) docFreq);
|
||||||
|
termStats.add("totalTermFreq", totalTermFreq);
|
||||||
|
termsMap.add(outTerm, termStats);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
result.add(field, termsMap);
|
result.add(field, termsMap);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void collectTermContext(IndexReaderContext topReaderContext, TermContext[] contextArray, Term[] queryTerms)
|
private static void collectTermContext(IndexReaderContext topReaderContext, TermContext[] contextArray,
|
||||||
throws IOException {
|
Term[] queryTerms) throws IOException {
|
||||||
TermsEnum termsEnum = null;
|
TermsEnum termsEnum = null;
|
||||||
for (LeafReaderContext context : topReaderContext.leaves()) {
|
for (LeafReaderContext context : topReaderContext.leaves()) {
|
||||||
final Fields fields = context.reader().fields();
|
final Fields fields = context.reader().fields();
|
||||||
|
@ -589,7 +587,7 @@ public class TermsComponent extends SearchComponent {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void collectStats(SolrIndexSearcher searcher, NamedList<Number> stats) {
|
private static void collectStats(SolrIndexSearcher searcher, NamedList<Number> stats) {
|
||||||
int numDocs = searcher.getTopReaderContext().reader().numDocs();
|
int numDocs = searcher.getTopReaderContext().reader().numDocs();
|
||||||
stats.add("numDocs", Long.valueOf(numDocs));
|
stats.add("numDocs", Long.valueOf(numDocs));
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,13 +34,14 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase
|
||||||
index(id, 19, "b_t", "snake spider shark snail slug", "foo_i", "2");
|
index(id, 19, "b_t", "snake spider shark snail slug", "foo_i", "2");
|
||||||
index(id, 20, "b_t", "snake spider shark snail", "foo_i", "3");
|
index(id, 20, "b_t", "snake spider shark snail", "foo_i", "3");
|
||||||
index(id, 21, "b_t", "snake spider shark", "foo_i", "2");
|
index(id, 21, "b_t", "snake spider shark", "foo_i", "2");
|
||||||
index(id, 22, "b_t", "snake spider");
|
index(id, 22, "b_t", "snake spider", "c_t", "snake spider");
|
||||||
index(id, 23, "b_t", "snake");
|
index(id, 23, "b_t", "snake", "c_t", "snake");
|
||||||
index(id, 24, "b_t", "ant zebra");
|
index(id, 24, "b_t", "ant zebra", "c_t", "ant zebra");
|
||||||
index(id, 25, "b_t", "zebra");
|
index(id, 25, "b_t", "zebra", "c_t", "zebra");
|
||||||
commit();
|
commit();
|
||||||
|
|
||||||
handle.clear();
|
handle.clear();
|
||||||
|
handle.put("terms", UNORDERED);
|
||||||
|
|
||||||
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t");
|
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t");
|
||||||
query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms", "true", "terms.fl", "b_t", "terms.lower", "s");
|
query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms", "true", "terms.fl", "b_t", "terms.lower", "s");
|
||||||
|
@ -53,5 +54,6 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase
|
||||||
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.list", "2, 3, 1");
|
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.list", "2, 3, 1");
|
||||||
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.stats", "true","terms.list", "2, 3, 1");
|
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.stats", "true","terms.list", "2, 3, 1");
|
||||||
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.list", "snake, zebra", "terms.ttf", "true");
|
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.list", "snake, zebra", "terms.ttf", "true");
|
||||||
|
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.fl", "c_t", "terms.list", "snake, ant, zebra", "terms.ttf", "true");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -351,4 +351,31 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
|
||||||
"//lst[@name='standardfilt']/lst[@name='snake']/long[@name='totalTermFreq'][.='3']");
|
"//lst[@name='standardfilt']/lst[@name='snake']/long[@name='totalTermFreq'][.='3']");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDocFreqAndTotalTermFreqForMultipleFields() throws Exception {
|
||||||
|
SolrQueryRequest req = req(
|
||||||
|
"indent","true",
|
||||||
|
"qt", "/terms",
|
||||||
|
"terms", "true",
|
||||||
|
"terms.fl", "lowerfilt",
|
||||||
|
"terms.fl", "standardfilt",
|
||||||
|
"terms.ttf", "true",
|
||||||
|
"terms.list", "a,aa,aaa");
|
||||||
|
assertQ(req,
|
||||||
|
"count(//lst[@name='lowerfilt']/*)=3",
|
||||||
|
"count(//lst[@name='standardfilt']/*)=3",
|
||||||
|
"//lst[@name='lowerfilt']/lst[@name='a']/long[@name='docFreq'][.='2']",
|
||||||
|
"//lst[@name='lowerfilt']/lst[@name='a']/long[@name='totalTermFreq'][.='2']",
|
||||||
|
"//lst[@name='lowerfilt']/lst[@name='aa']/long[@name='docFreq'][.='1']",
|
||||||
|
"//lst[@name='lowerfilt']/lst[@name='aa']/long[@name='totalTermFreq'][.='1']",
|
||||||
|
"//lst[@name='lowerfilt']/lst[@name='aaa']/long[@name='docFreq'][.='1']",
|
||||||
|
"//lst[@name='lowerfilt']/lst[@name='aaa']/long[@name='totalTermFreq'][.='1']",
|
||||||
|
"//lst[@name='standardfilt']/lst[@name='a']/long[@name='docFreq'][.='1']",
|
||||||
|
"//lst[@name='standardfilt']/lst[@name='a']/long[@name='totalTermFreq'][.='1']",
|
||||||
|
"//lst[@name='standardfilt']/lst[@name='aa']/long[@name='docFreq'][.='1']",
|
||||||
|
"//lst[@name='standardfilt']/lst[@name='aa']/long[@name='totalTermFreq'][.='1']",
|
||||||
|
"//lst[@name='standardfilt']/lst[@name='aaa']/long[@name='docFreq'][.='1']",
|
||||||
|
"//lst[@name='standardfilt']/lst[@name='aaa']/long[@name='totalTermFreq'][.='1']");
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue