mirror of https://github.com/apache/lucene.git
SOLR-2123: group by query
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@998707 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c84bd2f1ec
commit
3944f43cc4
|
@ -224,7 +224,22 @@ public class QueryComponent extends SearchComponent
|
|||
gc.groupBy = new QueryValueSource(q, 0.0f);
|
||||
}
|
||||
gc.key = groupByStr;
|
||||
gc.groupLimit = limitDefault;
|
||||
gc.numGroups = limitDefault;
|
||||
gc.docsPerGroup = docsPerGroupDefault;
|
||||
|
||||
cmd.groupCommands.add(gc);
|
||||
}
|
||||
}
|
||||
|
||||
if (queries != null) {
|
||||
for (String groupByStr : queries) {
|
||||
QParser parser = QParser.getParser(groupByStr, null, rb.req);
|
||||
Query gq = parser.getQuery();
|
||||
Grouping.CommandQuery gc = new Grouping.CommandQuery();
|
||||
gc.query = gq;
|
||||
gc.groupSort = groupSort;
|
||||
gc.key = groupByStr;
|
||||
gc.numGroups = limitDefault;
|
||||
gc.docsPerGroup = docsPerGroupDefault;
|
||||
|
||||
cmd.groupCommands.add(gc);
|
||||
|
|
|
@ -30,8 +30,8 @@ public class Grouping {
|
|||
public static class Command {
|
||||
public String key; // the name to use for this group in the response
|
||||
public Sort groupSort; // the sort of the documents *within* a single group.
|
||||
public int groupLimit; // how many groups - defaults to the "rows" parameter
|
||||
public int docsPerGroup; // how many docs in each group - from "group.limit" param, default=1
|
||||
public int numGroups; // how many groups - defaults to the "rows" parameter
|
||||
}
|
||||
|
||||
public static class CommandQuery extends Command {
|
||||
|
@ -73,10 +73,60 @@ class SearchGroup {
|
|||
***/
|
||||
}
|
||||
|
||||
abstract class GroupCollector extends Collector {
|
||||
/** get the number of matches before grouping or limiting have been applied */
|
||||
public abstract int getMatches();
|
||||
}
|
||||
|
||||
class FilterCollector extends GroupCollector {
|
||||
private final DocSet filter;
|
||||
private final TopFieldCollector collector;
|
||||
private int docBase;
|
||||
private int matches;
|
||||
|
||||
public FilterCollector(DocSet filter, TopFieldCollector collector) throws IOException {
|
||||
this.filter = filter;
|
||||
this.collector = collector;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
collector.setScorer(scorer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
matches++;
|
||||
if (filter.exists(doc + docBase))
|
||||
collector.collect(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(IndexReader reader, int docBase) throws IOException {
|
||||
this.docBase = docBase;
|
||||
collector.setNextReader(reader, docBase);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return collector.acceptsDocsOutOfOrder();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMatches() {
|
||||
return matches;
|
||||
}
|
||||
|
||||
TopFieldCollector getTopFieldCollector() {
|
||||
return collector;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/** Finds the top set of groups, grouped by groupByVS when sort == group.sort */
|
||||
class TopGroupCollector extends Collector {
|
||||
class TopGroupCollector extends GroupCollector {
|
||||
final int nGroups;
|
||||
final HashMap<MutableValue, SearchGroup> groupMap;
|
||||
TreeSet<SearchGroup> orderedGroups;
|
||||
|
@ -261,6 +311,7 @@ class TopGroupCollector extends Collector {
|
|||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMatches() {
|
||||
return matches;
|
||||
}
|
||||
|
|
|
@ -921,8 +921,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
|||
Sort sort = cmd.getSort();
|
||||
if (sort == null) sort = new Sort();
|
||||
|
||||
// TODO: make this a generic collector list
|
||||
List<TopGroupCollector> collectors = new ArrayList<TopGroupCollector>(cmd.groupCommands.size());
|
||||
List<GroupCollector> collectors = new ArrayList<GroupCollector>(cmd.groupCommands.size());
|
||||
for (Grouping.Command groupCommand : cmd.groupCommands) {
|
||||
// TODO: perhaps use some methods rather than instanceof
|
||||
if (groupCommand instanceof Grouping.CommandFunc) {
|
||||
|
@ -941,6 +940,12 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
|||
gc.context = context;
|
||||
gc.collector = collector;
|
||||
}
|
||||
|
||||
if (groupCommand instanceof Grouping.CommandQuery) {
|
||||
DocSet groupFilt = getDocSet(((Grouping.CommandQuery)groupCommand).query);
|
||||
TopFieldCollector collector = TopFieldCollector.create(groupCommand.groupSort==null ? sort : groupCommand.groupSort, groupCommand.docsPerGroup, false, needScores, needScores, true);
|
||||
collectors.add(new FilterCollector(groupFilt, collector));
|
||||
}
|
||||
}
|
||||
|
||||
Collector allCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]));
|
||||
|
@ -958,6 +963,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
|||
}
|
||||
|
||||
// TODO: make this a generic collector list
|
||||
int numPhase2 = 0;
|
||||
List<Phase2GroupCollector> phase2Collectors = new ArrayList<Phase2GroupCollector>(cmd.groupCommands.size());
|
||||
for (Grouping.Command groupCommand : cmd.groupCommands) {
|
||||
if (groupCommand instanceof Grouping.CommandFunc) {
|
||||
|
@ -965,10 +971,16 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
|||
Sort collectorSort = gc.groupSort == null ? sort : gc.groupSort;
|
||||
Phase2GroupCollector collector = new Phase2GroupCollector((TopGroupCollector)gc.collector, gc.groupBy, gc.context, collectorSort, gc.docsPerGroup, needScores);
|
||||
phase2Collectors.add(collector);
|
||||
numPhase2++;
|
||||
} else if (groupCommand instanceof Grouping.CommandQuery) {
|
||||
phase2Collectors.add(null);
|
||||
} else {
|
||||
phase2Collectors.add(null);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: optionally cache docs and feed them back through rather than re-searching
|
||||
if (numPhase2 > 0)
|
||||
search(query, luceneFilter, MultiCollector.wrap(phase2Collectors.toArray(new Collector[phase2Collectors.size()])));
|
||||
|
||||
Set<Integer> idSet = new LinkedHashSet<Integer>(); // used for tracking unique docs when we need a doclist
|
||||
|
@ -978,19 +990,49 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
|||
NamedList grouped = new SimpleOrderedMap();
|
||||
for (int cmdnum=0; cmdnum<cmd.groupCommands.size(); cmdnum++) {
|
||||
Grouping.Command groupCommand = cmd.groupCommands.get(cmdnum);
|
||||
Grouping.CommandFunc groupCommandFunc = (Grouping.CommandFunc)groupCommand;
|
||||
TopGroupCollector collector = collectors.get(cmdnum);
|
||||
Phase2GroupCollector collector2 = phase2Collectors.get(cmdnum);
|
||||
|
||||
if (collector.orderedGroups == null) collector.buildSet();
|
||||
GroupCollector gcollector = (GroupCollector)collectors.get(cmdnum);
|
||||
|
||||
NamedList groupResult = new SimpleOrderedMap();
|
||||
grouped.add(groupCommand.key, groupResult); // grouped={ key={
|
||||
|
||||
int this_matches = collector.getMatches();
|
||||
int this_matches = gcollector.getMatches();
|
||||
groupResult.add("matches", this_matches);
|
||||
maxMatches = Math.max(maxMatches, this_matches);
|
||||
|
||||
// TODO: refactor this
|
||||
if (groupCommand instanceof Grouping.CommandQuery) {
|
||||
TopDocs topDocs = ((FilterCollector)gcollector).getTopFieldCollector().topDocs(0, groupCommand.docsPerGroup);
|
||||
|
||||
// TODO: refactor
|
||||
|
||||
//topDocs.totalHits
|
||||
int ids[] = new int[topDocs.scoreDocs.length];
|
||||
float[] scores = needScores ? new float[topDocs.scoreDocs.length] : null;
|
||||
for (int i=0; i<ids.length; i++) {
|
||||
ids[i] = topDocs.scoreDocs[i].doc;
|
||||
if (scores != null)
|
||||
scores[i] = topDocs.scoreDocs[i].score;
|
||||
}
|
||||
|
||||
float score = topDocs.getMaxScore();
|
||||
maxScore = Math.max(maxScore, score);
|
||||
DocSlice docs = new DocSlice(0, ids.length, ids, scores, topDocs.totalHits, score);
|
||||
groupResult.add("doclist", docs);
|
||||
|
||||
if (getDocList) {
|
||||
for (int id : ids)
|
||||
idSet.add(id);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
Grouping.CommandFunc groupCommandFunc = (Grouping.CommandFunc)groupCommand;
|
||||
TopGroupCollector collector = (TopGroupCollector)gcollector;
|
||||
Phase2GroupCollector collector2 = phase2Collectors.get(cmdnum);
|
||||
|
||||
if (collector.orderedGroups == null) collector.buildSet();
|
||||
|
||||
List groupList = new ArrayList();
|
||||
groupResult.add("groups", groupList); // grouped={ key={ groups=[
|
||||
|
||||
|
|
|
@ -23,14 +23,16 @@ import org.apache.solr.common.util.StrUtils;
|
|||
|
||||
import java.io.StringReader;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
||||
public class JSONTestUtil {
|
||||
|
||||
public static String match(String input, String pathAndExpected) throws Exception {
|
||||
int pos = pathAndExpected.indexOf(':');
|
||||
int pos = pathAndExpected.indexOf("==");
|
||||
String path = pos>=0 ? pathAndExpected.substring(0,pos) : null;
|
||||
String expected = pos>=0 ? pathAndExpected.substring(pos+1) : pathAndExpected;
|
||||
String expected = pos>=0 ? pathAndExpected.substring(pos+2) : pathAndExpected;
|
||||
return match(path, input, expected);
|
||||
}
|
||||
|
||||
|
|
|
@ -168,10 +168,10 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
|
|||
);
|
||||
|
||||
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id")
|
||||
,"/responseHeader/status:0" // exact match
|
||||
,"/responseHeader:{'_SKIP_':'QTime', 'status':0}" // partial match by skipping some elements
|
||||
,"/responseHeader:{'_MATCH_':'status', 'status':0}" // partial match by only including some elements
|
||||
,"/grouped:{'foo_i':{'matches':10,'groups':[\n" +
|
||||
,"/responseHeader/status==0" // exact match
|
||||
,"/responseHeader=={'_SKIP_':'QTime', 'status':0}" // partial match by skipping some elements
|
||||
,"/responseHeader=={'_MATCH_':'status', 'status':0}" // partial match by only including some elements
|
||||
,"/grouped=={'"+f+"':{'matches':10,'groups':[\n" +
|
||||
"{'groupValue':1,'doclist':{'numFound':3,'start':0,'docs':[{'id':'8'}]}}," +
|
||||
"{'groupValue':3,'doclist':{'numFound':2,'start':0,'docs':[{'id':'3'}]}}," +
|
||||
"{'groupValue':2,'doclist':{'numFound':3,'start':0,'docs':[{'id':'4'}]}}," +
|
||||
|
@ -182,7 +182,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
|
|||
|
||||
// test limiting the number of groups returned
|
||||
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2")
|
||||
,"/grouped:{'foo_i':{'matches':10,'groups':[" +
|
||||
,"/grouped=={'"+f+"':{'matches':10,'groups':[" +
|
||||
"{'groupValue':1,'doclist':{'numFound':3,'start':0,'docs':[{'id':'8'}]}}," +
|
||||
"{'groupValue':3,'doclist':{'numFound':2,'start':0,'docs':[{'id':'3'}]}}" +
|
||||
"]}}"
|
||||
|
@ -190,7 +190,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
|
|||
|
||||
// test increasing the docs per group returned
|
||||
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2", "group.limit","3")
|
||||
,"/grouped:{'foo_i':{'matches':10,'groups':[" +
|
||||
,"/grouped=={'"+f+"':{'matches':10,'groups':[" +
|
||||
"{'groupValue':1,'doclist':{'numFound':3,'start':0,'docs':[{'id':'8'},{'id':'10'},{'id':'5'}]}}," +
|
||||
"{'groupValue':3,'doclist':{'numFound':2,'start':0,'docs':[{'id':'3'},{'id':'6'}]}}" +
|
||||
"]}}"
|
||||
|
@ -198,7 +198,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
|
|||
|
||||
// test adding in scores
|
||||
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id,score", "rows","2", "group.limit","2", "indent","off")
|
||||
,"/grouped/foo_i/groups:" +
|
||||
,"/grouped/"+f+"/groups==" +
|
||||
"[" +
|
||||
"{'groupValue':1,'doclist':{'numFound':3,'start':0,'maxScore':10.0,'docs':[{'id':'8','score':10.0},{'id':'10','score':3.0}]}}," +
|
||||
"{'groupValue':3,'doclist':{'numFound':2,'start':0,'maxScore':7.0,'docs':[{'id':'3','score':7.0},{'id':'6','score':2.0}]}}" +
|
||||
|
@ -209,7 +209,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
|
|||
// test function (functions are currently all float - this may change)
|
||||
String func = "add("+f+","+f+")";
|
||||
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.func", func , "fl","id", "rows","2")
|
||||
,"/grouped:{'"+func+"':{'matches':10,'groups':[" +
|
||||
,"/grouped=={'"+func+"':{'matches':10,'groups':[" +
|
||||
"{'groupValue':2.0,'doclist':{'numFound':3,'start':0,'docs':[{'id':'8'}]}}," +
|
||||
"{'groupValue':6.0,'doclist':{'numFound':2,'start':0,'docs':[{'id':'3'}]}}" +
|
||||
"]}}"
|
||||
|
@ -218,26 +218,47 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
|
|||
// test that faceting works with grouping
|
||||
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id"
|
||||
,"facet","true", "facet.field",f)
|
||||
,"/grouped/foo_i/matches:10:"
|
||||
,"/facet_counts/facet_fields/"+f+":['1',3, '2',3, '3',2, '4',1, '5',1]"
|
||||
,"/grouped/"+f+"/matches==10"
|
||||
,"/facet_counts/facet_fields/"+f+"==['1',3, '2',3, '3',2, '4',1, '5',1]"
|
||||
);
|
||||
purgeFieldCache(FieldCache.DEFAULT); // avoid FC insanity
|
||||
|
||||
// test that grouping works with highlighting
|
||||
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id"
|
||||
,"hl","true", "hl.fl",f)
|
||||
,"/grouped/foo_i/matches:10:"
|
||||
,"/highlighting:{'_ORDERED_':'', '8':{},'3':{},'4':{},'1':{},'2':{}}"
|
||||
,"/grouped/"+f+"/matches==10"
|
||||
,"/highlighting=={'_ORDERED_':'', '8':{},'3':{},'4':{},'1':{},'2':{}}"
|
||||
);
|
||||
|
||||
// test that grouping works with debugging
|
||||
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id"
|
||||
,"debugQuery","true")
|
||||
,"/grouped/foo_i/matches:10:"
|
||||
,"/debug/explain/8:"
|
||||
,"/debug/explain/2:"
|
||||
,"/grouped/"+f+"/matches==10"
|
||||
,"/debug/explain/8=="
|
||||
,"/debug/explain/2=="
|
||||
);
|
||||
|
||||
///////////////////////// group.query
|
||||
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.query","id:[2 TO 5]", "fl","id", "group.limit","3")
|
||||
,"/grouped=={'id:[2 TO 5]':{'matches':10," +
|
||||
"'doclist':{'numFound':4,'start':0,'docs':[{'id':'3'},{'id':'4'},{'id':'2'}]}}}"
|
||||
);
|
||||
|
||||
// multiple at once
|
||||
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true",
|
||||
"group.query","id:[2 TO 5]",
|
||||
"group.query","id:[5 TO 5]",
|
||||
"group.field",f,
|
||||
"rows","1",
|
||||
"fl","id", "group.limit","2")
|
||||
,"/grouped/id:[2 TO 5]=={'matches':10,'doclist':{'numFound':4,'start':0,'docs':[{'id':'3'},{'id':'4'}]}}"
|
||||
,"/grouped/id:[5 TO 5]=={'matches':10,'doclist':{'numFound':1,'start':0,'docs':[{'id':'5'}]}}"
|
||||
,"/grouped/"+f+"=={'matches':10,'groups':[{'groupValue':1,'doclist':{'numFound':3,'start':0,'docs':[{'id':'8'},{'id':'10'}]}}]}"
|
||||
);
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue