SOLR-10505: Add multi-field support to TermsComponent for terms stats

This commit is contained in:
Shai Erera 2017-04-18 06:33:18 +03:00
parent ffe61ff2ad
commit 19bcffa036
4 changed files with 75 additions and 46 deletions

View File

@ -175,6 +175,8 @@ New Features
Example: json.facet={x:"stddev(field1)", y:"variance(field2)"} Example: json.facet={x:"stddev(field1)", y:"variance(field2)"}
(Rustam Hashimov, yonik) (Rustam Hashimov, yonik)
* SOLR-10505: Add multi-field support to TermsComponent when requesting terms' statistics. (Shai Erera)
Optimizations Optimizations
---------------------- ----------------------

View File

@ -102,7 +102,7 @@ public class TermsComponent extends SearchComponent {
boolean termStats = params.getBool(TermsParams.TERMS_STATS, false); boolean termStats = params.getBool(TermsParams.TERMS_STATS, false);
if (termStats) { if (termStats) {
NamedList<Number> stats = new SimpleOrderedMap(); NamedList<Number> stats = new SimpleOrderedMap<>();
rb.rsp.add("indexstats", stats); rb.rsp.add("indexstats", stats);
collectStats(rb.req.getSearcher(), stats); collectStats(rb.req.getSearcher(), stats);
} }
@ -335,7 +335,7 @@ public class TermsComponent extends SearchComponent {
rb._termsHelper = null; rb._termsHelper = null;
} }
private ShardRequest createShardQuery(SolrParams params) { private static ShardRequest createShardQuery(SolrParams params) {
ShardRequest sreq = new ShardRequest(); ShardRequest sreq = new ShardRequest();
sreq.purpose = ShardRequest.PURPOSE_GET_TERMS; sreq.purpose = ShardRequest.PURPOSE_GET_TERMS;
@ -410,7 +410,7 @@ public class TermsComponent extends SearchComponent {
} }
} }
public NamedList buildResponse() { public NamedList<Object> buildResponse() {
NamedList<Object> response = new SimpleOrderedMap<>(); NamedList<Object> response = new SimpleOrderedMap<>();
// determine if we are going index or count sort // determine if we are going index or count sort
@ -480,7 +480,7 @@ public class TermsComponent extends SearchComponent {
} }
// use <int> tags for smaller facet counts (better back compatibility) // use <int> tags for smaller facet counts (better back compatibility)
private Number num(long val) { private static Number num(long val) {
if (val < Integer.MAX_VALUE) return (int) val; if (val < Integer.MAX_VALUE) return (int) val;
else return val; else return val;
} }
@ -515,32 +515,29 @@ public class TermsComponent extends SearchComponent {
} }
} }
private void fetchTerms(SolrIndexSearcher indexSearcher, private static void fetchTerms(SolrIndexSearcher indexSearcher, String[] fields, String termList,
String[] fields, boolean includeTotalTermFreq, NamedList<Object> result) throws IOException {
String termList,
boolean includeTotalTermFreq,
NamedList result) throws IOException {
String field = fields[0];
FieldType fieldType = indexSearcher.getSchema().getField(field).getType();
String[] splitTerms = termList.split(","); String[] splitTerms = termList.split(",");
for (int i = 0; i < splitTerms.length; i++) { for (int i = 0; i < splitTerms.length; i++) {
splitTerms[i] = splitTerms[i].trim(); splitTerms[i] = splitTerms[i].trim();
} }
// Sort the terms once
Arrays.sort(splitTerms);
IndexReaderContext topReaderContext = indexSearcher.getTopReaderContext();
for (String field : fields) {
FieldType fieldType = indexSearcher.getSchema().getField(field).getType();
// Since splitTerms is already sorted, this array will also be sorted
Term[] terms = new Term[splitTerms.length]; Term[] terms = new Term[splitTerms.length];
for (int i = 0; i < splitTerms.length; i++) { for (int i = 0; i < splitTerms.length; i++) {
terms[i] = new Term(field, fieldType.readableToIndexed(splitTerms[i])); terms[i] = new Term(field, fieldType.readableToIndexed(splitTerms[i]));
} }
Arrays.sort(terms);
IndexReaderContext topReaderContext = indexSearcher.getTopReaderContext();
TermContext[] termContexts = new TermContext[terms.length]; TermContext[] termContexts = new TermContext[terms.length];
collectTermContext(topReaderContext, termContexts, terms); collectTermContext(topReaderContext, termContexts, terms);
NamedList termsMap = new SimpleOrderedMap(); NamedList<Object> termsMap = new SimpleOrderedMap<>();
for (int i = 0; i < terms.length; i++) { for (int i = 0; i < terms.length; i++) {
if (termContexts[i] != null) { if (termContexts[i] != null) {
String outTerm = fieldType.indexedToReadable(terms[i].bytes().utf8ToString()); String outTerm = fieldType.indexedToReadable(terms[i].bytes().utf8ToString());
@ -559,9 +556,10 @@ public class TermsComponent extends SearchComponent {
result.add(field, termsMap); result.add(field, termsMap);
} }
}
private void collectTermContext(IndexReaderContext topReaderContext, TermContext[] contextArray, Term[] queryTerms) private static void collectTermContext(IndexReaderContext topReaderContext, TermContext[] contextArray,
throws IOException { Term[] queryTerms) throws IOException {
TermsEnum termsEnum = null; TermsEnum termsEnum = null;
for (LeafReaderContext context : topReaderContext.leaves()) { for (LeafReaderContext context : topReaderContext.leaves()) {
final Fields fields = context.reader().fields(); final Fields fields = context.reader().fields();
@ -589,7 +587,7 @@ public class TermsComponent extends SearchComponent {
} }
} }
private void collectStats(SolrIndexSearcher searcher, NamedList<Number> stats) { private static void collectStats(SolrIndexSearcher searcher, NamedList<Number> stats) {
int numDocs = searcher.getTopReaderContext().reader().numDocs(); int numDocs = searcher.getTopReaderContext().reader().numDocs();
stats.add("numDocs", Long.valueOf(numDocs)); stats.add("numDocs", Long.valueOf(numDocs));
} }

View File

@ -34,13 +34,14 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase
index(id, 19, "b_t", "snake spider shark snail slug", "foo_i", "2"); index(id, 19, "b_t", "snake spider shark snail slug", "foo_i", "2");
index(id, 20, "b_t", "snake spider shark snail", "foo_i", "3"); index(id, 20, "b_t", "snake spider shark snail", "foo_i", "3");
index(id, 21, "b_t", "snake spider shark", "foo_i", "2"); index(id, 21, "b_t", "snake spider shark", "foo_i", "2");
index(id, 22, "b_t", "snake spider"); index(id, 22, "b_t", "snake spider", "c_t", "snake spider");
index(id, 23, "b_t", "snake"); index(id, 23, "b_t", "snake", "c_t", "snake");
index(id, 24, "b_t", "ant zebra"); index(id, 24, "b_t", "ant zebra", "c_t", "ant zebra");
index(id, 25, "b_t", "zebra"); index(id, 25, "b_t", "zebra", "c_t", "zebra");
commit(); commit();
handle.clear(); handle.clear();
handle.put("terms", UNORDERED);
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t"); query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t");
query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms", "true", "terms.fl", "b_t", "terms.lower", "s"); query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms", "true", "terms.fl", "b_t", "terms.lower", "s");
@ -53,5 +54,6 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.list", "2, 3, 1"); query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.list", "2, 3, 1");
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.stats", "true","terms.list", "2, 3, 1"); query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.stats", "true","terms.list", "2, 3, 1");
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.list", "snake, zebra", "terms.ttf", "true"); query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.list", "snake, zebra", "terms.ttf", "true");
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.fl", "c_t", "terms.list", "snake, ant, zebra", "terms.ttf", "true");
} }
} }

View File

@ -351,4 +351,31 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
"//lst[@name='standardfilt']/lst[@name='snake']/long[@name='totalTermFreq'][.='3']"); "//lst[@name='standardfilt']/lst[@name='snake']/long[@name='totalTermFreq'][.='3']");
} }
@Test
public void testDocFreqAndTotalTermFreqForMultipleFields() throws Exception {
SolrQueryRequest req = req(
"indent","true",
"qt", "/terms",
"terms", "true",
"terms.fl", "lowerfilt",
"terms.fl", "standardfilt",
"terms.ttf", "true",
"terms.list", "a,aa,aaa");
assertQ(req,
"count(//lst[@name='lowerfilt']/*)=3",
"count(//lst[@name='standardfilt']/*)=3",
"//lst[@name='lowerfilt']/lst[@name='a']/long[@name='docFreq'][.='2']",
"//lst[@name='lowerfilt']/lst[@name='a']/long[@name='totalTermFreq'][.='2']",
"//lst[@name='lowerfilt']/lst[@name='aa']/long[@name='docFreq'][.='1']",
"//lst[@name='lowerfilt']/lst[@name='aa']/long[@name='totalTermFreq'][.='1']",
"//lst[@name='lowerfilt']/lst[@name='aaa']/long[@name='docFreq'][.='1']",
"//lst[@name='lowerfilt']/lst[@name='aaa']/long[@name='totalTermFreq'][.='1']",
"//lst[@name='standardfilt']/lst[@name='a']/long[@name='docFreq'][.='1']",
"//lst[@name='standardfilt']/lst[@name='a']/long[@name='totalTermFreq'][.='1']",
"//lst[@name='standardfilt']/lst[@name='aa']/long[@name='docFreq'][.='1']",
"//lst[@name='standardfilt']/lst[@name='aa']/long[@name='totalTermFreq'][.='1']",
"//lst[@name='standardfilt']/lst[@name='aaa']/long[@name='docFreq'][.='1']",
"//lst[@name='standardfilt']/lst[@name='aaa']/long[@name='totalTermFreq'][.='1']");
}
} }