From 19bcffa0362b5d8d9d85dbe9c68497d4d81b8436 Mon Sep 17 00:00:00 2001 From: Shai Erera Date: Tue, 18 Apr 2017 06:33:18 +0300 Subject: [PATCH] SOLR-10505: Add multi-field support to TermsComponent for terms stats --- solr/CHANGES.txt | 2 + .../handler/component/TermsComponent.java | 82 +++++++++---------- .../DistributedTermsComponentTest.java | 10 ++- .../handler/component/TermsComponentTest.java | 27 ++++++ 4 files changed, 75 insertions(+), 46 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 64de85477ba..6d55a3e6ee2 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -175,6 +175,8 @@ New Features Example: json.facet={x:"stddev(field1)", y:"variance(field2)"} (Rustam Hashimov, yonik) +* SOLR-10505: Add multi-field support to TermsComponent when requesting terms' statistics. (Shai Erera) + Optimizations ---------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java index b05939e511c..6c89e1daa6d 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java @@ -101,8 +101,8 @@ public class TermsComponent extends SearchComponent { boolean termStats = params.getBool(TermsParams.TERMS_STATS, false); - if(termStats) { - NamedList stats = new SimpleOrderedMap(); + if (termStats) { + NamedList stats = new SimpleOrderedMap<>(); rb.rsp.add("indexstats", stats); collectStats(rb.req.getSearcher(), stats); } @@ -335,7 +335,7 @@ public class TermsComponent extends SearchComponent { rb._termsHelper = null; } - private ShardRequest createShardQuery(SolrParams params) { + private static ShardRequest createShardQuery(SolrParams params) { ShardRequest sreq = new ShardRequest(); sreq.purpose = ShardRequest.PURPOSE_GET_TERMS; @@ -410,7 +410,7 @@ public class TermsComponent extends SearchComponent { } } - public NamedList buildResponse() { + public NamedList buildResponse() { NamedList response = new SimpleOrderedMap<>(); // determine if we are going index or count sort @@ -480,7 +480,7 @@ public class TermsComponent extends SearchComponent { } // use tags for smaller facet counts (better back compatibility) - private Number num(long val) { + private static Number num(long val) { if (val < Integer.MAX_VALUE) return (int) val; else return val; } @@ -515,53 +515,51 @@ public class TermsComponent extends SearchComponent { } } - private void fetchTerms(SolrIndexSearcher indexSearcher, - String[] fields, - String termList, - boolean includeTotalTermFreq, - NamedList result) throws IOException { - - String field = fields[0]; - FieldType fieldType = indexSearcher.getSchema().getField(field).getType(); + private static void fetchTerms(SolrIndexSearcher indexSearcher, String[] fields, String termList, + boolean includeTotalTermFreq, NamedList result) throws IOException { String[] splitTerms = termList.split(","); - - for(int i=0; i termStats = new SimpleOrderedMap<>(); - termStats.add("docFreq", (long) docFreq); - termStats.add("totalTermFreq", totalTermFreq); - termsMap.add(outTerm, termStats); + // Since splitTerms is already sorted, this array will also be sorted + Term[] terms = new Term[splitTerms.length]; + for (int i = 0; i < splitTerms.length; i++) { + terms[i] = new Term(field, fieldType.readableToIndexed(splitTerms[i])); + } + + TermContext[] termContexts = new TermContext[terms.length]; + collectTermContext(topReaderContext, termContexts, terms); + + NamedList termsMap = new SimpleOrderedMap<>(); + for (int i = 0; i < terms.length; i++) { + if (termContexts[i] != null) { + String outTerm = fieldType.indexedToReadable(terms[i].bytes().utf8ToString()); + int docFreq = termContexts[i].docFreq(); + if (!includeTotalTermFreq) { + termsMap.add(outTerm, docFreq); + } else { + long totalTermFreq = termContexts[i].totalTermFreq(); + NamedList termStats = new SimpleOrderedMap<>(); + termStats.add("docFreq", (long) docFreq); + termStats.add("totalTermFreq", totalTermFreq); + termsMap.add(outTerm, termStats); + } } } - } - result.add(field, termsMap); + result.add(field, termsMap); + } } - private void collectTermContext(IndexReaderContext topReaderContext, TermContext[] contextArray, Term[] queryTerms) - throws IOException { + private static void collectTermContext(IndexReaderContext topReaderContext, TermContext[] contextArray, + Term[] queryTerms) throws IOException { TermsEnum termsEnum = null; for (LeafReaderContext context : topReaderContext.leaves()) { final Fields fields = context.reader().fields(); @@ -589,7 +587,7 @@ public class TermsComponent extends SearchComponent { } } - private void collectStats(SolrIndexSearcher searcher, NamedList stats) { + private static void collectStats(SolrIndexSearcher searcher, NamedList stats) { int numDocs = searcher.getTopReaderContext().reader().numDocs(); stats.add("numDocs", Long.valueOf(numDocs)); } diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java index 9c90efb03e7..b3f1f306b6e 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java @@ -34,13 +34,14 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase index(id, 19, "b_t", "snake spider shark snail slug", "foo_i", "2"); index(id, 20, "b_t", "snake spider shark snail", "foo_i", "3"); index(id, 21, "b_t", "snake spider shark", "foo_i", "2"); - index(id, 22, "b_t", "snake spider"); - index(id, 23, "b_t", "snake"); - index(id, 24, "b_t", "ant zebra"); - index(id, 25, "b_t", "zebra"); + index(id, 22, "b_t", "snake spider", "c_t", "snake spider"); + index(id, 23, "b_t", "snake", "c_t", "snake"); + index(id, 24, "b_t", "ant zebra", "c_t", "ant zebra"); + index(id, 25, "b_t", "zebra", "c_t", "zebra"); commit(); handle.clear(); + handle.put("terms", UNORDERED); query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t"); query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms", "true", "terms.fl", "b_t", "terms.lower", "s"); @@ -53,5 +54,6 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.list", "2, 3, 1"); query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.stats", "true","terms.list", "2, 3, 1"); query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.list", "snake, zebra", "terms.ttf", "true"); + query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.fl", "c_t", "terms.list", "snake, ant, zebra", "terms.ttf", "true"); } } diff --git a/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java index 7fb5e120260..29e54e04671 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java @@ -351,4 +351,31 @@ public class TermsComponentTest extends SolrTestCaseJ4 { "//lst[@name='standardfilt']/lst[@name='snake']/long[@name='totalTermFreq'][.='3']"); } + @Test + public void testDocFreqAndTotalTermFreqForMultipleFields() throws Exception { + SolrQueryRequest req = req( + "indent","true", + "qt", "/terms", + "terms", "true", + "terms.fl", "lowerfilt", + "terms.fl", "standardfilt", + "terms.ttf", "true", + "terms.list", "a,aa,aaa"); + assertQ(req, + "count(//lst[@name='lowerfilt']/*)=3", + "count(//lst[@name='standardfilt']/*)=3", + "//lst[@name='lowerfilt']/lst[@name='a']/long[@name='docFreq'][.='2']", + "//lst[@name='lowerfilt']/lst[@name='a']/long[@name='totalTermFreq'][.='2']", + "//lst[@name='lowerfilt']/lst[@name='aa']/long[@name='docFreq'][.='1']", + "//lst[@name='lowerfilt']/lst[@name='aa']/long[@name='totalTermFreq'][.='1']", + "//lst[@name='lowerfilt']/lst[@name='aaa']/long[@name='docFreq'][.='1']", + "//lst[@name='lowerfilt']/lst[@name='aaa']/long[@name='totalTermFreq'][.='1']", + "//lst[@name='standardfilt']/lst[@name='a']/long[@name='docFreq'][.='1']", + "//lst[@name='standardfilt']/lst[@name='a']/long[@name='totalTermFreq'][.='1']", + "//lst[@name='standardfilt']/lst[@name='aa']/long[@name='docFreq'][.='1']", + "//lst[@name='standardfilt']/lst[@name='aa']/long[@name='totalTermFreq'][.='1']", + "//lst[@name='standardfilt']/lst[@name='aaa']/long[@name='docFreq'][.='1']", + "//lst[@name='standardfilt']/lst[@name='aaa']/long[@name='totalTermFreq'][.='1']"); + } + }