SOLR-2123: group by query

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@998707 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2010-09-19 17:59:33 +00:00
parent c84bd2f1ec
commit 3944f43cc4
5 changed files with 160 additions and 29 deletions

View File

@ -224,7 +224,22 @@ public class QueryComponent extends SearchComponent
gc.groupBy = new QueryValueSource(q, 0.0f);
}
gc.key = groupByStr;
gc.groupLimit = limitDefault;
gc.numGroups = limitDefault;
gc.docsPerGroup = docsPerGroupDefault;
cmd.groupCommands.add(gc);
}
}
if (queries != null) {
for (String groupByStr : queries) {
QParser parser = QParser.getParser(groupByStr, null, rb.req);
Query gq = parser.getQuery();
Grouping.CommandQuery gc = new Grouping.CommandQuery();
gc.query = gq;
gc.groupSort = groupSort;
gc.key = groupByStr;
gc.numGroups = limitDefault;
gc.docsPerGroup = docsPerGroupDefault;
cmd.groupCommands.add(gc);

View File

@ -30,8 +30,8 @@ public class Grouping {
public static class Command {
public String key; // the name to use for this group in the response
public Sort groupSort; // the sort of the documents *within* a single group.
public int groupLimit; // how many groups - defaults to the "rows" parameter
public int docsPerGroup; // how many docs in each group - from "group.limit" param, default=1
public int numGroups; // how many groups - defaults to the "rows" parameter
}
public static class CommandQuery extends Command {
@ -73,10 +73,60 @@ class SearchGroup {
***/
}
abstract class GroupCollector extends Collector {
/** get the number of matches before grouping or limiting have been applied */
public abstract int getMatches();
}
class FilterCollector extends GroupCollector {
private final DocSet filter;
private final TopFieldCollector collector;
private int docBase;
private int matches;
public FilterCollector(DocSet filter, TopFieldCollector collector) throws IOException {
this.filter = filter;
this.collector = collector;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
collector.setScorer(scorer);
}
@Override
public void collect(int doc) throws IOException {
matches++;
if (filter.exists(doc + docBase))
collector.collect(doc);
}
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
this.docBase = docBase;
collector.setNextReader(reader, docBase);
}
@Override
public boolean acceptsDocsOutOfOrder() {
return collector.acceptsDocsOutOfOrder();
}
@Override
public int getMatches() {
return matches;
}
TopFieldCollector getTopFieldCollector() {
return collector;
}
}
/** Finds the top set of groups, grouped by groupByVS when sort == group.sort */
class TopGroupCollector extends Collector {
class TopGroupCollector extends GroupCollector {
final int nGroups;
final HashMap<MutableValue, SearchGroup> groupMap;
TreeSet<SearchGroup> orderedGroups;
@ -261,6 +311,7 @@ class TopGroupCollector extends Collector {
return false;
}
@Override
public int getMatches() {
return matches;
}

View File

@ -921,8 +921,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
Sort sort = cmd.getSort();
if (sort == null) sort = new Sort();
// TODO: make this a generic collector list
List<TopGroupCollector> collectors = new ArrayList<TopGroupCollector>(cmd.groupCommands.size());
List<GroupCollector> collectors = new ArrayList<GroupCollector>(cmd.groupCommands.size());
for (Grouping.Command groupCommand : cmd.groupCommands) {
// TODO: perhaps use some methods rather than instanceof
if (groupCommand instanceof Grouping.CommandFunc) {
@ -941,6 +940,12 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
gc.context = context;
gc.collector = collector;
}
if (groupCommand instanceof Grouping.CommandQuery) {
DocSet groupFilt = getDocSet(((Grouping.CommandQuery)groupCommand).query);
TopFieldCollector collector = TopFieldCollector.create(groupCommand.groupSort==null ? sort : groupCommand.groupSort, groupCommand.docsPerGroup, false, needScores, needScores, true);
collectors.add(new FilterCollector(groupFilt, collector));
}
}
Collector allCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]));
@ -958,6 +963,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
}
// TODO: make this a generic collector list
int numPhase2 = 0;
List<Phase2GroupCollector> phase2Collectors = new ArrayList<Phase2GroupCollector>(cmd.groupCommands.size());
for (Grouping.Command groupCommand : cmd.groupCommands) {
if (groupCommand instanceof Grouping.CommandFunc) {
@ -965,11 +971,17 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
Sort collectorSort = gc.groupSort == null ? sort : gc.groupSort;
Phase2GroupCollector collector = new Phase2GroupCollector((TopGroupCollector)gc.collector, gc.groupBy, gc.context, collectorSort, gc.docsPerGroup, needScores);
phase2Collectors.add(collector);
numPhase2++;
} else if (groupCommand instanceof Grouping.CommandQuery) {
phase2Collectors.add(null);
} else {
phase2Collectors.add(null);
}
}
// TODO: optionally cache docs and feed them back through rather than re-searching
search(query, luceneFilter, MultiCollector.wrap(phase2Collectors.toArray(new Collector[phase2Collectors.size()])));
if (numPhase2 > 0)
search(query, luceneFilter, MultiCollector.wrap(phase2Collectors.toArray(new Collector[phase2Collectors.size()])));
Set<Integer> idSet = new LinkedHashSet<Integer>(); // used for tracking unique docs when we need a doclist
int maxMatches = 0;
@ -978,19 +990,49 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
NamedList grouped = new SimpleOrderedMap();
for (int cmdnum=0; cmdnum<cmd.groupCommands.size(); cmdnum++) {
Grouping.Command groupCommand = cmd.groupCommands.get(cmdnum);
Grouping.CommandFunc groupCommandFunc = (Grouping.CommandFunc)groupCommand;
TopGroupCollector collector = collectors.get(cmdnum);
Phase2GroupCollector collector2 = phase2Collectors.get(cmdnum);
if (collector.orderedGroups == null) collector.buildSet();
GroupCollector gcollector = (GroupCollector)collectors.get(cmdnum);
NamedList groupResult = new SimpleOrderedMap();
grouped.add(groupCommand.key, groupResult); // grouped={ key={
int this_matches = collector.getMatches();
int this_matches = gcollector.getMatches();
groupResult.add("matches", this_matches);
maxMatches = Math.max(maxMatches, this_matches);
// TODO: refactor this
if (groupCommand instanceof Grouping.CommandQuery) {
TopDocs topDocs = ((FilterCollector)gcollector).getTopFieldCollector().topDocs(0, groupCommand.docsPerGroup);
// TODO: refactor
//topDocs.totalHits
int ids[] = new int[topDocs.scoreDocs.length];
float[] scores = needScores ? new float[topDocs.scoreDocs.length] : null;
for (int i=0; i<ids.length; i++) {
ids[i] = topDocs.scoreDocs[i].doc;
if (scores != null)
scores[i] = topDocs.scoreDocs[i].score;
}
float score = topDocs.getMaxScore();
maxScore = Math.max(maxScore, score);
DocSlice docs = new DocSlice(0, ids.length, ids, scores, topDocs.totalHits, score);
groupResult.add("doclist", docs);
if (getDocList) {
for (int id : ids)
idSet.add(id);
}
continue;
}
Grouping.CommandFunc groupCommandFunc = (Grouping.CommandFunc)groupCommand;
TopGroupCollector collector = (TopGroupCollector)gcollector;
Phase2GroupCollector collector2 = phase2Collectors.get(cmdnum);
if (collector.orderedGroups == null) collector.buildSet();
List groupList = new ArrayList();
groupResult.add("groups", groupList); // grouped={ key={ groups=[

View File

@ -23,14 +23,16 @@ import org.apache.solr.common.util.StrUtils;
import java.io.StringReader;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class JSONTestUtil {
public static String match(String input, String pathAndExpected) throws Exception {
int pos = pathAndExpected.indexOf(':');
int pos = pathAndExpected.indexOf("==");
String path = pos>=0 ? pathAndExpected.substring(0,pos) : null;
String expected = pos>=0 ? pathAndExpected.substring(pos+1) : pathAndExpected;
String expected = pos>=0 ? pathAndExpected.substring(pos+2) : pathAndExpected;
return match(path, input, expected);
}

View File

@ -168,10 +168,10 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
);
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id")
,"/responseHeader/status:0" // exact match
,"/responseHeader:{'_SKIP_':'QTime', 'status':0}" // partial match by skipping some elements
,"/responseHeader:{'_MATCH_':'status', 'status':0}" // partial match by only including some elements
,"/grouped:{'foo_i':{'matches':10,'groups':[\n" +
,"/responseHeader/status==0" // exact match
,"/responseHeader=={'_SKIP_':'QTime', 'status':0}" // partial match by skipping some elements
,"/responseHeader=={'_MATCH_':'status', 'status':0}" // partial match by only including some elements
,"/grouped=={'"+f+"':{'matches':10,'groups':[\n" +
"{'groupValue':1,'doclist':{'numFound':3,'start':0,'docs':[{'id':'8'}]}}," +
"{'groupValue':3,'doclist':{'numFound':2,'start':0,'docs':[{'id':'3'}]}}," +
"{'groupValue':2,'doclist':{'numFound':3,'start':0,'docs':[{'id':'4'}]}}," +
@ -182,7 +182,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
// test limiting the number of groups returned
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2")
,"/grouped:{'foo_i':{'matches':10,'groups':[" +
,"/grouped=={'"+f+"':{'matches':10,'groups':[" +
"{'groupValue':1,'doclist':{'numFound':3,'start':0,'docs':[{'id':'8'}]}}," +
"{'groupValue':3,'doclist':{'numFound':2,'start':0,'docs':[{'id':'3'}]}}" +
"]}}"
@ -190,7 +190,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
// test increasing the docs per group returned
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2", "group.limit","3")
,"/grouped:{'foo_i':{'matches':10,'groups':[" +
,"/grouped=={'"+f+"':{'matches':10,'groups':[" +
"{'groupValue':1,'doclist':{'numFound':3,'start':0,'docs':[{'id':'8'},{'id':'10'},{'id':'5'}]}}," +
"{'groupValue':3,'doclist':{'numFound':2,'start':0,'docs':[{'id':'3'},{'id':'6'}]}}" +
"]}}"
@ -198,7 +198,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
// test adding in scores
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id,score", "rows","2", "group.limit","2", "indent","off")
,"/grouped/foo_i/groups:" +
,"/grouped/"+f+"/groups==" +
"[" +
"{'groupValue':1,'doclist':{'numFound':3,'start':0,'maxScore':10.0,'docs':[{'id':'8','score':10.0},{'id':'10','score':3.0}]}}," +
"{'groupValue':3,'doclist':{'numFound':2,'start':0,'maxScore':7.0,'docs':[{'id':'3','score':7.0},{'id':'6','score':2.0}]}}" +
@ -209,7 +209,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
// test function (functions are currently all float - this may change)
String func = "add("+f+","+f+")";
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.func", func , "fl","id", "rows","2")
,"/grouped:{'"+func+"':{'matches':10,'groups':[" +
,"/grouped=={'"+func+"':{'matches':10,'groups':[" +
"{'groupValue':2.0,'doclist':{'numFound':3,'start':0,'docs':[{'id':'8'}]}}," +
"{'groupValue':6.0,'doclist':{'numFound':2,'start':0,'docs':[{'id':'3'}]}}" +
"]}}"
@ -218,26 +218,47 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
// test that faceting works with grouping
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id"
,"facet","true", "facet.field",f)
,"/grouped/foo_i/matches:10:"
,"/facet_counts/facet_fields/"+f+":['1',3, '2',3, '3',2, '4',1, '5',1]"
,"/grouped/"+f+"/matches==10"
,"/facet_counts/facet_fields/"+f+"==['1',3, '2',3, '3',2, '4',1, '5',1]"
);
purgeFieldCache(FieldCache.DEFAULT); // avoid FC insanity
// test that grouping works with highlighting
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id"
,"hl","true", "hl.fl",f)
,"/grouped/foo_i/matches:10:"
,"/highlighting:{'_ORDERED_':'', '8':{},'3':{},'4':{},'1':{},'2':{}}"
,"/grouped/"+f+"/matches==10"
,"/highlighting=={'_ORDERED_':'', '8':{},'3':{},'4':{},'1':{},'2':{}}"
);
// test that grouping works with debugging
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id"
,"debugQuery","true")
,"/grouped/foo_i/matches:10:"
,"/debug/explain/8:"
,"/debug/explain/2:"
,"/grouped/"+f+"/matches==10"
,"/debug/explain/8=="
,"/debug/explain/2=="
);
///////////////////////// group.query
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.query","id:[2 TO 5]", "fl","id", "group.limit","3")
,"/grouped=={'id:[2 TO 5]':{'matches':10," +
"'doclist':{'numFound':4,'start':0,'docs':[{'id':'3'},{'id':'4'},{'id':'2'}]}}}"
);
// multiple at once
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true",
"group.query","id:[2 TO 5]",
"group.query","id:[5 TO 5]",
"group.field",f,
"rows","1",
"fl","id", "group.limit","2")
,"/grouped/id:[2 TO 5]=={'matches':10,'doclist':{'numFound':4,'start':0,'docs':[{'id':'3'},{'id':'4'}]}}"
,"/grouped/id:[5 TO 5]=={'matches':10,'doclist':{'numFound':1,'start':0,'docs':[{'id':'5'}]}}"
,"/grouped/"+f+"=={'matches':10,'groups':[{'groupValue':1,'doclist':{'numFound':3,'start':0,'docs':[{'id':'8'},{'id':'10'}]}}]}"
);
};
}