From 3944f43cc4de1cc0e282ebc40fe6a0840fe89e6a Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Sun, 19 Sep 2010 17:59:33 +0000 Subject: [PATCH] SOLR-2123: group by query git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@998707 13f79535-47bb-0310-9956-ffa450edef68 --- .../handler/component/QueryComponent.java | 17 +++++- .../java/org/apache/solr/search/Grouping.java | 55 ++++++++++++++++- .../apache/solr/search/SolrIndexSearcher.java | 60 ++++++++++++++++--- .../test/org/apache/solr/JSONTestUtil.java | 6 +- .../org/apache/solr/TestGroupingSearch.java | 51 +++++++++++----- 5 files changed, 160 insertions(+), 29 deletions(-) diff --git a/solr/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/src/java/org/apache/solr/handler/component/QueryComponent.java index 18d96291ef3..36bc89e4862 100644 --- a/solr/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -224,7 +224,22 @@ public class QueryComponent extends SearchComponent gc.groupBy = new QueryValueSource(q, 0.0f); } gc.key = groupByStr; - gc.groupLimit = limitDefault; + gc.numGroups = limitDefault; + gc.docsPerGroup = docsPerGroupDefault; + + cmd.groupCommands.add(gc); + } + } + + if (queries != null) { + for (String groupByStr : queries) { + QParser parser = QParser.getParser(groupByStr, null, rb.req); + Query gq = parser.getQuery(); + Grouping.CommandQuery gc = new Grouping.CommandQuery(); + gc.query = gq; + gc.groupSort = groupSort; + gc.key = groupByStr; + gc.numGroups = limitDefault; gc.docsPerGroup = docsPerGroupDefault; cmd.groupCommands.add(gc); diff --git a/solr/src/java/org/apache/solr/search/Grouping.java b/solr/src/java/org/apache/solr/search/Grouping.java index 8e91b45d6ee..3e1abe05fbe 100755 --- a/solr/src/java/org/apache/solr/search/Grouping.java +++ b/solr/src/java/org/apache/solr/search/Grouping.java @@ -30,8 +30,8 @@ public class Grouping { public static class Command { public String key; // the name to use for this group in the response public Sort groupSort; // the sort of the documents *within* a single group. - public int groupLimit; // how many groups - defaults to the "rows" parameter public int docsPerGroup; // how many docs in each group - from "group.limit" param, default=1 + public int numGroups; // how many groups - defaults to the "rows" parameter } public static class CommandQuery extends Command { @@ -73,10 +73,60 @@ class SearchGroup { ***/ } +abstract class GroupCollector extends Collector { + /** get the number of matches before grouping or limiting have been applied */ + public abstract int getMatches(); +} + +class FilterCollector extends GroupCollector { + private final DocSet filter; + private final TopFieldCollector collector; + private int docBase; + private int matches; + + public FilterCollector(DocSet filter, TopFieldCollector collector) throws IOException { + this.filter = filter; + this.collector = collector; + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + collector.setScorer(scorer); + } + + @Override + public void collect(int doc) throws IOException { + matches++; + if (filter.exists(doc + docBase)) + collector.collect(doc); + } + + @Override + public void setNextReader(IndexReader reader, int docBase) throws IOException { + this.docBase = docBase; + collector.setNextReader(reader, docBase); + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return collector.acceptsDocsOutOfOrder(); + } + + @Override + public int getMatches() { + return matches; + } + + TopFieldCollector getTopFieldCollector() { + return collector; + } +} + + /** Finds the top set of groups, grouped by groupByVS when sort == group.sort */ -class TopGroupCollector extends Collector { +class TopGroupCollector extends GroupCollector { final int nGroups; final HashMap groupMap; TreeSet orderedGroups; @@ -261,6 +311,7 @@ class TopGroupCollector extends Collector { return false; } + @Override public int getMatches() { return matches; } diff --git a/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java index 16ab9277cd2..c14de7def7a 100644 --- a/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -921,8 +921,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean { Sort sort = cmd.getSort(); if (sort == null) sort = new Sort(); - // TODO: make this a generic collector list - List collectors = new ArrayList(cmd.groupCommands.size()); + List collectors = new ArrayList(cmd.groupCommands.size()); for (Grouping.Command groupCommand : cmd.groupCommands) { // TODO: perhaps use some methods rather than instanceof if (groupCommand instanceof Grouping.CommandFunc) { @@ -941,6 +940,12 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean { gc.context = context; gc.collector = collector; } + + if (groupCommand instanceof Grouping.CommandQuery) { + DocSet groupFilt = getDocSet(((Grouping.CommandQuery)groupCommand).query); + TopFieldCollector collector = TopFieldCollector.create(groupCommand.groupSort==null ? sort : groupCommand.groupSort, groupCommand.docsPerGroup, false, needScores, needScores, true); + collectors.add(new FilterCollector(groupFilt, collector)); + } } Collector allCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()])); @@ -958,6 +963,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean { } // TODO: make this a generic collector list + int numPhase2 = 0; List phase2Collectors = new ArrayList(cmd.groupCommands.size()); for (Grouping.Command groupCommand : cmd.groupCommands) { if (groupCommand instanceof Grouping.CommandFunc) { @@ -965,11 +971,17 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean { Sort collectorSort = gc.groupSort == null ? sort : gc.groupSort; Phase2GroupCollector collector = new Phase2GroupCollector((TopGroupCollector)gc.collector, gc.groupBy, gc.context, collectorSort, gc.docsPerGroup, needScores); phase2Collectors.add(collector); + numPhase2++; + } else if (groupCommand instanceof Grouping.CommandQuery) { + phase2Collectors.add(null); + } else { + phase2Collectors.add(null); } } // TODO: optionally cache docs and feed them back through rather than re-searching - search(query, luceneFilter, MultiCollector.wrap(phase2Collectors.toArray(new Collector[phase2Collectors.size()]))); + if (numPhase2 > 0) + search(query, luceneFilter, MultiCollector.wrap(phase2Collectors.toArray(new Collector[phase2Collectors.size()]))); Set idSet = new LinkedHashSet(); // used for tracking unique docs when we need a doclist int maxMatches = 0; @@ -978,19 +990,49 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean { NamedList grouped = new SimpleOrderedMap(); for (int cmdnum=0; cmdnum=0 ? pathAndExpected.substring(0,pos) : null; - String expected = pos>=0 ? pathAndExpected.substring(pos+1) : pathAndExpected; + String expected = pos>=0 ? pathAndExpected.substring(pos+2) : pathAndExpected; return match(path, input, expected); } diff --git a/solr/src/test/org/apache/solr/TestGroupingSearch.java b/solr/src/test/org/apache/solr/TestGroupingSearch.java index 60f72881647..4da45e1e080 100644 --- a/solr/src/test/org/apache/solr/TestGroupingSearch.java +++ b/solr/src/test/org/apache/solr/TestGroupingSearch.java @@ -168,10 +168,10 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { ); assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id") - ,"/responseHeader/status:0" // exact match - ,"/responseHeader:{'_SKIP_':'QTime', 'status':0}" // partial match by skipping some elements - ,"/responseHeader:{'_MATCH_':'status', 'status':0}" // partial match by only including some elements - ,"/grouped:{'foo_i':{'matches':10,'groups':[\n" + + ,"/responseHeader/status==0" // exact match + ,"/responseHeader=={'_SKIP_':'QTime', 'status':0}" // partial match by skipping some elements + ,"/responseHeader=={'_MATCH_':'status', 'status':0}" // partial match by only including some elements + ,"/grouped=={'"+f+"':{'matches':10,'groups':[\n" + "{'groupValue':1,'doclist':{'numFound':3,'start':0,'docs':[{'id':'8'}]}}," + "{'groupValue':3,'doclist':{'numFound':2,'start':0,'docs':[{'id':'3'}]}}," + "{'groupValue':2,'doclist':{'numFound':3,'start':0,'docs':[{'id':'4'}]}}," + @@ -182,7 +182,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { // test limiting the number of groups returned assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2") - ,"/grouped:{'foo_i':{'matches':10,'groups':[" + + ,"/grouped=={'"+f+"':{'matches':10,'groups':[" + "{'groupValue':1,'doclist':{'numFound':3,'start':0,'docs':[{'id':'8'}]}}," + "{'groupValue':3,'doclist':{'numFound':2,'start':0,'docs':[{'id':'3'}]}}" + "]}}" @@ -190,7 +190,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { // test increasing the docs per group returned assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2", "group.limit","3") - ,"/grouped:{'foo_i':{'matches':10,'groups':[" + + ,"/grouped=={'"+f+"':{'matches':10,'groups':[" + "{'groupValue':1,'doclist':{'numFound':3,'start':0,'docs':[{'id':'8'},{'id':'10'},{'id':'5'}]}}," + "{'groupValue':3,'doclist':{'numFound':2,'start':0,'docs':[{'id':'3'},{'id':'6'}]}}" + "]}}" @@ -198,7 +198,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { // test adding in scores assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id,score", "rows","2", "group.limit","2", "indent","off") - ,"/grouped/foo_i/groups:" + + ,"/grouped/"+f+"/groups==" + "[" + "{'groupValue':1,'doclist':{'numFound':3,'start':0,'maxScore':10.0,'docs':[{'id':'8','score':10.0},{'id':'10','score':3.0}]}}," + "{'groupValue':3,'doclist':{'numFound':2,'start':0,'maxScore':7.0,'docs':[{'id':'3','score':7.0},{'id':'6','score':2.0}]}}" + @@ -209,7 +209,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { // test function (functions are currently all float - this may change) String func = "add("+f+","+f+")"; assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.func", func , "fl","id", "rows","2") - ,"/grouped:{'"+func+"':{'matches':10,'groups':[" + + ,"/grouped=={'"+func+"':{'matches':10,'groups':[" + "{'groupValue':2.0,'doclist':{'numFound':3,'start':0,'docs':[{'id':'8'}]}}," + "{'groupValue':6.0,'doclist':{'numFound':2,'start':0,'docs':[{'id':'3'}]}}" + "]}}" @@ -218,26 +218,47 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { // test that faceting works with grouping assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id" ,"facet","true", "facet.field",f) - ,"/grouped/foo_i/matches:10:" - ,"/facet_counts/facet_fields/"+f+":['1',3, '2',3, '3',2, '4',1, '5',1]" + ,"/grouped/"+f+"/matches==10" + ,"/facet_counts/facet_fields/"+f+"==['1',3, '2',3, '3',2, '4',1, '5',1]" ); purgeFieldCache(FieldCache.DEFAULT); // avoid FC insanity // test that grouping works with highlighting assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id" ,"hl","true", "hl.fl",f) - ,"/grouped/foo_i/matches:10:" - ,"/highlighting:{'_ORDERED_':'', '8':{},'3':{},'4':{},'1':{},'2':{}}" + ,"/grouped/"+f+"/matches==10" + ,"/highlighting=={'_ORDERED_':'', '8':{},'3':{},'4':{},'1':{},'2':{}}" ); // test that grouping works with debugging assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id" ,"debugQuery","true") - ,"/grouped/foo_i/matches:10:" - ,"/debug/explain/8:" - ,"/debug/explain/2:" + ,"/grouped/"+f+"/matches==10" + ,"/debug/explain/8==" + ,"/debug/explain/2==" ); + + ///////////////////////// group.query + assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.query","id:[2 TO 5]", "fl","id", "group.limit","3") + ,"/grouped=={'id:[2 TO 5]':{'matches':10," + + "'doclist':{'numFound':4,'start':0,'docs':[{'id':'3'},{'id':'4'},{'id':'2'}]}}}" + ); + + // multiple at once + assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", + "group.query","id:[2 TO 5]", + "group.query","id:[5 TO 5]", + "group.field",f, + "rows","1", + "fl","id", "group.limit","2") + ,"/grouped/id:[2 TO 5]=={'matches':10,'doclist':{'numFound':4,'start':0,'docs':[{'id':'3'},{'id':'4'}]}}" + ,"/grouped/id:[5 TO 5]=={'matches':10,'doclist':{'numFound':1,'start':0,'docs':[{'id':'5'}]}}" + ,"/grouped/"+f+"=={'matches':10,'groups':[{'groupValue':1,'doclist':{'numFound':3,'start':0,'docs':[{'id':'8'},{'id':'10'}]}}]}" + ); + + }; + }