diff --git a/solr/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/src/java/org/apache/solr/handler/component/QueryComponent.java index d9b43f5ebef..500b303065f 100644 --- a/solr/src/java/org/apache/solr/handler/component/QueryComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/QueryComponent.java @@ -300,8 +300,8 @@ public class QueryComponent extends SearchComponent boolean doGroup = params.getBool(GroupParams.GROUP, false); if (doGroup) { try { - cmd.groupCommands = new ArrayList(); - + Grouping grouping = new Grouping(searcher, result, cmd); + String[] fields = params.getParams(GroupParams.GROUP_FIELD); String[] funcs = params.getParams(GroupParams.GROUP_FUNC); String[] queries = params.getParams(GroupParams.GROUP_QUERY); @@ -330,7 +330,7 @@ public class QueryComponent extends SearchComponent for (String groupByStr : funcs) { QParser parser = QParser.getParser(groupByStr, "func", rb.req); Query q = parser.getQuery(); - Grouping.CommandFunc gc = new Grouping.CommandFunc(); + Grouping.CommandFunc gc = grouping.new CommandFunc(); gc.groupSort = groupSort; if (q instanceof FunctionQuery) { @@ -343,8 +343,9 @@ public class QueryComponent extends SearchComponent gc.docsPerGroup = docsPerGroupDefault; gc.groupOffset = groupOffsetDefault; gc.offset = cmd.getOffset(); + gc.sort = cmd.getSort(); - cmd.groupCommands.add(gc); + grouping.add(gc); } } @@ -352,7 +353,7 @@ public class QueryComponent extends SearchComponent for (String groupByStr : queries) { QParser parser = QParser.getParser(groupByStr, null, rb.req); Query gq = parser.getQuery(); - Grouping.CommandQuery gc = new Grouping.CommandQuery(); + Grouping.CommandQuery gc = grouping.new CommandQuery(); gc.query = gq; gc.groupSort = groupSort; gc.key = groupByStr; @@ -360,26 +361,23 @@ public class QueryComponent extends SearchComponent gc.docsPerGroup = docsPerGroupDefault; gc.groupOffset = groupOffsetDefault; - cmd.groupCommands.add(gc); + grouping.add(gc); } } - if (cmd.groupCommands.size() == 0) - cmd.groupCommands = null; - - if (cmd.groupCommands != null) { - if (rb.doHighlights || rb.isDebug()) { - // we need a single list of the returned docs - cmd.setFlags(SolrIndexSearcher.GET_DOCLIST); - } - - searcher.search(result,cmd); - rb.setResult( result ); - rsp.add("grouped", result.groupedResults); - // TODO: get "hits" a different way to log - return; + if (rb.doHighlights || rb.isDebug()) { + // we need a single list of the returned docs + cmd.setFlags(SolrIndexSearcher.GET_DOCLIST); } + + // searcher.search(result,cmd); + grouping.execute(); + rb.setResult( result ); + rsp.add("grouped", result.groupedResults); + // TODO: get "hits" a different way to log + return; + } catch (ParseException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } diff --git a/solr/src/java/org/apache/solr/search/Grouping.java b/solr/src/java/org/apache/solr/search/Grouping.java index 6e4d8ed3d90..a3bbabf7ac1 100755 --- a/solr/src/java/org/apache/solr/search/Grouping.java +++ b/solr/src/java/org/apache/solr/search/Grouping.java @@ -19,6 +19,8 @@ package org.apache.solr.search; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.*; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.search.function.DocValues; import org.apache.solr.search.function.ValueSource; @@ -27,27 +29,286 @@ import java.util.*; public class Grouping { - public static class Command { - public String key; // the name to use for this group in the response - public Sort groupSort; // the sort of the documents *within* a single group. + + public abstract class Command { + public String key; // the name to use for this group in the response + public Sort groupSort; // the sort of the documents *within* a single group. + public Sort sort; // the sort between groups public int docsPerGroup; // how many docs in each group - from "group.limit" param, default=1 - public int groupOffset; // the offset within each group (for paging within each group) - public int numGroups; // how many groups - defaults to the "rows" parameter - public int offset; // offset into the list of groups + public int groupOffset; // the offset within each group (for paging within each group) + public int numGroups; // how many groups - defaults to the "rows" parameter + public int offset; // offset into the list of groups + + + abstract void prepare() throws IOException; + abstract Collector createCollector() throws IOException; + Collector createNextCollector() throws IOException { + return null; + } + abstract void finish() throws IOException; + + abstract int getMatches(); + + NamedList commonResponse() { + NamedList groupResult = new SimpleOrderedMap(); + grouped.add(key, groupResult); // grouped={ key={ + + int this_matches = getMatches(); + groupResult.add("matches", this_matches); + maxMatches = Math.max(maxMatches, this_matches); + return groupResult; + } + + DocList getDocList(TopDocsCollector collector) { + int docsToCollect = getMax(groupOffset, docsPerGroup, maxDoc); + + // TODO: implement a DocList impl that doesn't need to start at offset=0 + TopDocs topDocs = collector.topDocs(0, docsToCollect); + + int ids[] = new int[topDocs.scoreDocs.length]; + float[] scores = needScores ? new float[topDocs.scoreDocs.length] : null; + for (int i=0; i maxDoc) docsToCollect = maxDoc; + + collector2 = new Phase2GroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, offset); + return collector2; + } + + @Override + void finish() throws IOException { + NamedList groupResult = commonResponse(); + + if (collector.orderedGroups == null) collector.buildSet(); + + List groupList = new ArrayList(); + groupResult.add("groups", groupList); // grouped={ key={ groups=[ + + int skipCount = offset; + for (SearchGroup group : collector.orderedGroups) { + if (skipCount > 0) { + skipCount--; + continue; + } + NamedList nl = new SimpleOrderedMap(); + groupList.add(nl); // grouped={ key={ groups=[ { + + nl.add("groupValue", group.groupValue.toObject()); + + SearchGroupDocs groupDocs = collector2.groupMap.get(group.groupValue); + addDocList(nl, groupDocs.collector); + } + } + + @Override + int getMatches() { + return collector.getMatches(); + } } + + + + static Sort byScoreDesc = new Sort(); + + static boolean compareSorts(Sort sort1, Sort sort2) { + return sort1 == sort2 || normalizeSort(sort1).equals(normalizeSort(sort2)); + } + + /** returns a sort by score desc if null */ + static Sort normalizeSort(Sort sort) { + return sort==null ? byScoreDesc : sort; + } + + static int getMax(int offset, int len, int max) { + int v = len<0 ? max : offset + len; + if (v < 0 || v > max) v = max; + return v; + } + + static TopDocsCollector newCollector(Sort sort, int numHits, boolean fillFields, boolean needScores) throws IOException { + if (sort==null || sort==byScoreDesc) { + return TopScoreDocCollector.create(numHits, true); + } else { + return TopFieldCollector.create(sort, numHits, false, needScores, needScores, true); + } + } + + + final SolrIndexSearcher searcher; + final SolrIndexSearcher.QueryResult qr; + final SolrIndexSearcher.QueryCommand cmd; + final List commands = new ArrayList(); + + public Grouping(SolrIndexSearcher searcher, SolrIndexSearcher.QueryResult qr, SolrIndexSearcher.QueryCommand cmd) { + this.searcher = searcher; + this.qr = qr; + this.cmd = cmd; + } + + public void add(Grouping.Command groupingCommand) { + commands.add(groupingCommand); + } + + int maxDoc; + boolean needScores; + boolean getDocSet; + boolean getDocList; // doclist needed for debugging or highlighting + Query query; + DocSet filter; + Filter luceneFilter; + NamedList grouped = new SimpleOrderedMap(); + Set idSet = new LinkedHashSet(); // used for tracking unique docs when we need a doclist + int maxMatches; // max number of matches from any grouping command + float maxScore = Float.NEGATIVE_INFINITY; // max score seen in any doclist + + public void execute() throws IOException { + DocListAndSet out = new DocListAndSet(); + qr.setDocListAndSet(out); + + filter = cmd.getFilter()!=null ? cmd.getFilter() : searcher.getDocSet(cmd.getFilterList()); + + maxDoc = searcher.maxDoc(); + + needScores = (cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0; + getDocSet = (cmd.getFlags() & SolrIndexSearcher.GET_DOCSET) != 0; + getDocList = (cmd.getFlags() & SolrIndexSearcher.GET_DOCLIST) != 0; // doclist needed for debugging or highlighting + query = QueryUtils.makeQueryable(cmd.getQuery()); + + for (Command cmd : commands) { + cmd.prepare(); + } + + List collectors = new ArrayList(commands.size()); + for (Command cmd : commands) { + Collector collector = cmd.createCollector(); + if (collector != null) + collectors.add(collector); + } + + Collector allCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()])); + DocSetCollector setCollector = null; + if (getDocSet) { + setCollector = new DocSetDelegateCollector(maxDoc>>6, maxDoc, allCollectors); + allCollectors = setCollector; + } + + searcher.search(query, luceneFilter, allCollectors); + + if (getDocSet) { + qr.setDocSet(setCollector.getDocSet()); + } + + collectors.clear(); + for (Command cmd : commands) { + Collector collector = cmd.createNextCollector(); + if (collector != null) + collectors.add(collector); + } + + if (collectors.size() > 0) { + searcher.search(query, luceneFilter, MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]))); + } + + for (Command cmd : commands) { + cmd.finish(); + } + + qr.groupedResults = grouped; + + if (getDocList) { + int sz = idSet.size(); + int[] ids = new int[sz]; + int idx = 0; + for (int val : idSet) { + ids[idx++] = val; + } + qr.setDocList(new DocSlice(0, sz, ids, null, maxMatches, maxScore)); + } + } + } @@ -82,11 +343,11 @@ abstract class GroupCollector extends Collector { class FilterCollector extends GroupCollector { private final DocSet filter; - private final TopFieldCollector collector; + private final Collector collector; private int docBase; private int matches; - public FilterCollector(DocSet filter, TopFieldCollector collector) throws IOException { + public FilterCollector(DocSet filter, Collector collector) throws IOException { this.filter = filter; this.collector = collector; } @@ -119,7 +380,7 @@ class FilterCollector extends GroupCollector { return matches; } - TopFieldCollector getTopFieldCollector() { + Collector getCollector() { return collector; } } @@ -527,7 +788,10 @@ class Phase2GroupCollector extends Collector { } SearchGroupDocs groupDocs = new SearchGroupDocs(); groupDocs.groupValue = group.groupValue; - groupDocs.collector = TopFieldCollector.create(sort, docsPerGroup, getSortFields, getScores, getScores, true); + if (sort==null) + groupDocs.collector = TopScoreDocCollector.create(docsPerGroup, true); + else + groupDocs.collector = TopFieldCollector.create(sort, docsPerGroup, getSortFields, getScores, getScores, true); groupMap.put(groupDocs.groupValue, groupDocs); } @@ -571,6 +835,6 @@ class Phase2GroupCollector extends Collector { // disad: blows up the size of SearchGroup if we need many of them, and couples implementations class SearchGroupDocs { public MutableValue groupValue; - TopFieldCollector collector; + TopDocsCollector collector; } diff --git a/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java index 77488b0c3c1..88f5e402c16 100644 --- a/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -983,238 +983,19 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean { return qr.getDocList(); } - private static final int NO_CHECK_QCACHE = 0x80000000; - private static final int GET_DOCSET = 0x40000000; - private static final int NO_CHECK_FILTERCACHE = 0x20000000; + static final int NO_CHECK_QCACHE = 0x80000000; + static final int GET_DOCSET = 0x40000000; + static final int NO_CHECK_FILTERCACHE = 0x20000000; public static final int GET_DOCLIST = 0x02; // get the documents actually returned in a response public static final int GET_SCORES = 0x01; - private void groupBy(QueryResult qr, QueryCommand cmd) throws IOException { - DocListAndSet out = new DocListAndSet(); - qr.setDocListAndSet(out); - - DocSet filter = cmd.getFilter()!=null ? cmd.getFilter() : getDocSet(cmd.getFilterList()); - - int maxDoc = maxDoc(); - - boolean needScores = (cmd.getFlags() & GET_SCORES) != 0; - boolean getDocSet = (cmd.getFlags() & GET_DOCSET) != 0; - boolean getDocList = (cmd.getFlags() & GET_DOCLIST) != 0; // doclist needed for debugging or highlighting - Query query = QueryUtils.makeQueryable(cmd.getQuery()); - - final Filter luceneFilter = filter==null ? null : filter.getTopFilter(); - - Sort sort = cmd.getSort(); - if (sort == null) sort = new Sort(); - - List collectors = new ArrayList(cmd.groupCommands.size()); - for (Grouping.Command groupCommand : cmd.groupCommands) { - // TODO: perhaps use some methods rather than instanceof - if (groupCommand instanceof Grouping.CommandFunc) { - Grouping.CommandFunc gc = (Grouping.CommandFunc)groupCommand; - Map context = ValueSource.newContext(); - gc.groupBy.createWeight(context, this); - TopGroupCollector collector; - - int groupsToCollect = gc.numGroups<0 ? maxDoc : gc.offset + gc.numGroups; - if (groupsToCollect < 0 || groupsToCollect > maxDoc) groupsToCollect = maxDoc; - - if (gc.groupSort != null && gc.groupSort != sort) { - collector = new TopGroupSortCollector(gc.groupBy, context, sort, gc.groupSort, groupsToCollect); - } else { - collector = new TopGroupCollector(gc.groupBy, context, sort, groupsToCollect); - } - collectors.add(collector); - - // for next phase - gc.context = context; - gc.collector = collector; - } - - if (groupCommand instanceof Grouping.CommandQuery) { - int docsToCollect = groupCommand.docsPerGroup<0 ? maxDoc : groupCommand.groupOffset + groupCommand.docsPerGroup; - if (docsToCollect < 0 || docsToCollect > maxDoc) docsToCollect = maxDoc; - - DocSet groupFilt = getDocSet(((Grouping.CommandQuery)groupCommand).query); - TopFieldCollector collector = TopFieldCollector.create(groupCommand.groupSort==null ? sort : groupCommand.groupSort, docsToCollect, false, needScores, needScores, true); - collectors.add(new FilterCollector(groupFilt, collector)); - } - } - - Collector allCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()])); - DocSetCollector setCollector = null; - if (getDocSet) { - // TODO: can callCollectors be zero length? - setCollector = new DocSetDelegateCollector(maxDoc()>>6, maxDoc(), allCollectors); - allCollectors = setCollector; - } - - search(query, luceneFilter, allCollectors); - - if (getDocSet) { - qr.docListAndSet.docSet = setCollector.getDocSet(); - } - - // TODO: make this a generic collector list - int numPhase2 = 0; - List phase2Collectors = new ArrayList(cmd.groupCommands.size()); - for (Grouping.Command groupCommand : cmd.groupCommands) { - if (groupCommand instanceof Grouping.CommandFunc) { - Grouping.CommandFunc gc = (Grouping.CommandFunc)groupCommand; - Sort collectorSort = gc.groupSort == null ? sort : gc.groupSort; - - int docsToCollect = groupCommand.docsPerGroup<0 ? maxDoc : groupCommand.groupOffset + groupCommand.docsPerGroup; - if (docsToCollect < 0 || docsToCollect > maxDoc) docsToCollect = maxDoc; - - Phase2GroupCollector collector = new Phase2GroupCollector((TopGroupCollector)gc.collector, gc.groupBy, gc.context, collectorSort, docsToCollect, needScores, groupCommand.offset); - phase2Collectors.add(collector); - numPhase2++; - } else if (groupCommand instanceof Grouping.CommandQuery) { - phase2Collectors.add(null); - } else { - phase2Collectors.add(null); - } - } - - // TODO: optionally cache docs and feed them back through rather than re-searching - if (numPhase2 > 0) - search(query, luceneFilter, MultiCollector.wrap(phase2Collectors.toArray(new Collector[phase2Collectors.size()]))); - - Set idSet = new LinkedHashSet(); // used for tracking unique docs when we need a doclist - int maxMatches = 0; - float maxScore = Float.NEGATIVE_INFINITY; - - NamedList grouped = new SimpleOrderedMap(); - for (int cmdnum=0; cmdnum maxDoc) docsToCollect = maxDoc; - - TopDocs topDocs = ((FilterCollector)gcollector).getTopFieldCollector().topDocs(0, docsToCollect); - - // TODO: refactor - - //topDocs.totalHits - int ids[] = new int[topDocs.scoreDocs.length]; - float[] scores = needScores ? new float[topDocs.scoreDocs.length] : null; - for (int i=0; i 0) { - skipCount--; - continue; - } - NamedList nl = new SimpleOrderedMap(); - groupList.add(nl); // grouped={ key={ groups=[ { - - nl.add("groupValue", group.groupValue.toObject()); - - SearchGroupDocs groupDocs = collector2.groupMap.get(group.groupValue); - // nl.add("matches", groupDocs.matches); // redundant with doclist.numFound from the doc list - - int docsToCollect = groupCommand.docsPerGroup<0 ? maxDoc : groupCommand.groupOffset + groupCommand.docsPerGroup; - if (docsToCollect < 0 || docsToCollect > maxDoc) docsToCollect = maxDoc; - - TopDocs topDocs = groupDocs.collector.topDocs(0, docsToCollect); - //topDocs.totalHits - int ids[] = new int[topDocs.scoreDocs.length]; - float[] scores = needScores ? new float[topDocs.scoreDocs.length] : null; - for (int i=0; i groupCommands; + // public List groupCommands; public Query getQuery() { return query; } public QueryCommand setQuery(Query query) { diff --git a/solr/src/test/org/apache/solr/TestGroupingSearch.java b/solr/src/test/org/apache/solr/TestGroupingSearch.java index f11c14fa0d2..be7ca9d9269 100644 --- a/solr/src/test/org/apache/solr/TestGroupingSearch.java +++ b/solr/src/test/org/apache/solr/TestGroupingSearch.java @@ -105,27 +105,6 @@ public class TestGroupingSearch extends SolrTestCaseJ4 { ); } - @Test - public void testGroupingGroupSortingName() { - assertU(add(doc("id", "1","name", "author1", "title", "a book title"))); - assertU(add(doc("id", "2","name", "author1", "title", "the title"))); - assertU(add(doc("id", "3","name", "author2", "title", "book title"))); - assertU(add(doc("id", "4","name", "author2", "title", "the title"))); - assertU(commit()); - - assertQ(req("q","title:title", "group", "true", "group.field","name", "group.sort", "title asc") - ,"*[count(//arr[@name='groups']/lst) = 2]" - ,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author2']" - // ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']" - ,"//arr[@name='groups']/lst[1]/result[@numFound='2']" - ,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='3']" - - ,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author1']" - // ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']" - ,"//arr[@name='groups']/lst[2]/result[@numFound='2']" - ,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='1']" - ); - } @Test public void testGroupingGroupSortingWeight() {