SOLR-2063: simple flattened grouped response as main result list

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1038779 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2010-11-24 19:34:14 +00:00
parent a1678b7d81
commit 897ec3a8ef
4 changed files with 147 additions and 6 deletions

View File

@ -32,5 +32,8 @@ public interface GroupParams {
public static final String GROUP_LIMIT = GROUP + ".limit"; public static final String GROUP_LIMIT = GROUP + ".limit";
/** the offset for the doclist of each group */ /** the offset for the doclist of each group */
public static final String GROUP_OFFSET = GROUP + ".offset"; public static final String GROUP_OFFSET = GROUP + ".offset";
/** treat the first group result as the main result. true/false */
public static final String GROUP_MAIN = GROUP + ".main";
} }

View File

@ -306,6 +306,7 @@ public class QueryComponent extends SearchComponent
String[] funcs = params.getParams(GroupParams.GROUP_FUNC); String[] funcs = params.getParams(GroupParams.GROUP_FUNC);
String[] queries = params.getParams(GroupParams.GROUP_QUERY); String[] queries = params.getParams(GroupParams.GROUP_QUERY);
String groupSortStr = params.get(GroupParams.GROUP_SORT); String groupSortStr = params.get(GroupParams.GROUP_SORT);
boolean main = params.getBool(GroupParams.GROUP_MAIN, false);
// groupSort defaults to sort // groupSort defaults to sort
Sort groupSort = groupSortStr == null ? cmd.getSort() : QueryParsing.parseSort(groupSortStr, req); Sort groupSort = groupSortStr == null ? cmd.getSort() : QueryParsing.parseSort(groupSortStr, req);
@ -345,6 +346,12 @@ public class QueryComponent extends SearchComponent
gc.offset = cmd.getOffset(); gc.offset = cmd.getOffset();
gc.sort = cmd.getSort(); gc.sort = cmd.getSort();
if (main) {
gc.main = true;
main = false;
gc.groupOffset = 0; // doesn't make sense
}
grouping.add(gc); grouping.add(gc);
} }
} }
@ -361,6 +368,17 @@ public class QueryComponent extends SearchComponent
gc.docsPerGroup = docsPerGroupDefault; gc.docsPerGroup = docsPerGroupDefault;
gc.groupOffset = groupOffsetDefault; gc.groupOffset = groupOffsetDefault;
// these two params will only be used if this is for the main result set
gc.offset = cmd.getOffset();
gc.numGroups = limitDefault;
if (main) {
gc.main = true;
main = false;
gc.docsPerGroup = gc.numGroups; // doesn't make sense to limit to one
gc.groupOffset = gc.offset;
}
grouping.add(gc); grouping.add(gc);
} }
} }
@ -376,6 +394,12 @@ public class QueryComponent extends SearchComponent
rb.setResult( result ); rb.setResult( result );
rsp.add("grouped", result.groupedResults); rsp.add("grouped", result.groupedResults);
// TODO: get "hits" a different way to log // TODO: get "hits" a different way to log
if (grouping.mainResult != null) {
rsp.add("response",grouping.mainResult);
rsp.getToLog().add("hits", grouping.mainResult.matches());
}
return; return;
} catch (ParseException e) { } catch (ParseException e) {

View File

@ -33,6 +33,7 @@ import java.util.*;
public class Grouping { public class Grouping {
public enum Format {Grouped, Simple}
public abstract class Command { public abstract class Command {
public String key; // the name to use for this group in the response public String key; // the name to use for this group in the response
@ -42,6 +43,8 @@ public class Grouping {
public int groupOffset; // the offset within each group (for paging within each group) public int groupOffset; // the offset within each group (for paging within each group)
public int numGroups; // how many groups - defaults to the "rows" parameter public int numGroups; // how many groups - defaults to the "rows" parameter
public int offset; // offset into the list of groups public int offset; // offset into the list of groups
public Format format;
public boolean main; // use as the main result in simple format (grouped.main=true param)
abstract void prepare() throws IOException; abstract void prepare() throws IOException;
@ -65,7 +68,13 @@ public class Grouping {
DocList getDocList(TopDocsCollector collector) { DocList getDocList(TopDocsCollector collector) {
int max = collector.getTotalHits(); int max = collector.getTotalHits();
int docsToCollect = getMax(groupOffset, docsPerGroup, max); int off = groupOffset;
int len = docsPerGroup;
if (main) {
off = offset;
len = numGroups;
}
int docsToCollect = getMax(off, len, max);
// TODO: implement a DocList impl that doesn't need to start at offset=0 // TODO: implement a DocList impl that doesn't need to start at offset=0
TopDocs topDocs = collector.topDocs(0, docsToCollect); TopDocs topDocs = collector.topDocs(0, docsToCollect);
@ -80,7 +89,7 @@ public class Grouping {
float score = topDocs.getMaxScore(); float score = topDocs.getMaxScore();
maxScore = Math.max(maxScore, score); maxScore = Math.max(maxScore, score);
DocSlice docs = new DocSlice(groupOffset, Math.max(0, ids.length - groupOffset), ids, scores, topDocs.totalHits, score); DocSlice docs = new DocSlice(off, Math.max(0, ids.length - off), ids, scores, topDocs.totalHits, score);
if (getDocList) { if (getDocList) {
DocIterator iter = docs.iterator(); DocIterator iter = docs.iterator();
@ -116,8 +125,12 @@ public class Grouping {
@Override @Override
void finish() throws IOException { void finish() throws IOException {
NamedList rsp = commonResponse(); if (main) {
addDocList(rsp, (TopDocsCollector)collector.getCollector()); mainResult = getDocList((TopDocsCollector)collector.getCollector());
} else {
NamedList rsp = commonResponse();
addDocList(rsp, (TopDocsCollector)collector.getCollector());
}
} }
@Override @Override
@ -168,16 +181,24 @@ public class Grouping {
int docsToCollect = getMax(groupOffset, docsPerGroup, maxDoc); int docsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
docsToCollect = Math.max(docsToCollect, 1); docsToCollect = Math.max(docsToCollect, 1);
// if this for the main result, don't skip groups (since we are counting docs, not groups)
int collectorOffset = main ? 0 : offset;
if (groupBy instanceof StrFieldSource) { if (groupBy instanceof StrFieldSource) {
collector2 = new Phase2StringGroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, offset); collector2 = new Phase2StringGroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, collectorOffset);
} else { } else {
collector2 = new Phase2GroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, offset); collector2 = new Phase2GroupCollector(collector, groupBy, context, groupSort, docsToCollect, needScores, collectorOffset);
} }
return collector2; return collector2;
} }
@Override @Override
void finish() throws IOException { void finish() throws IOException {
if (main) {
createMainResponse();
return;
}
NamedList groupResult = commonResponse(); NamedList groupResult = commonResponse();
List groupList = new ArrayList(); List groupList = new ArrayList();
@ -204,6 +225,57 @@ public class Grouping {
} }
} }
private void createMainResponse() {
int docCount = numGroups;
int docOffset = offset;
int docsToGather = getMax(docOffset, docCount, maxDoc);
float maxScore = Float.NEGATIVE_INFINITY;
List<TopDocs> topDocsList = new ArrayList<TopDocs>();
int numDocs = 0;
for (SearchGroup group : collector.orderedGroups) {
SearchGroupDocs groupDocs = collector2.groupMap.get(group.groupValue);
TopDocsCollector collector = groupDocs.collector;
int hits = collector.getTotalHits();
int num = Math.min(docsPerGroup, hits - groupOffset); // how many docs are in this group
if (num <= 0) continue;
TopDocs topDocs = collector.topDocs(groupOffset, Math.min(docsPerGroup,docsToGather-numDocs));
topDocsList.add(topDocs);
numDocs += topDocs.scoreDocs.length;
float score = topDocs.getMaxScore();
maxScore = Math.max(maxScore, score);
if (numDocs >= docsToGather) break;
}
assert numDocs <= docCount; // make sure we didn't gather too many
int[] ids = new int[numDocs];
float[] scores = needScores ? new float[numDocs] : null;
int pos = 0;
for (TopDocs topDocs : topDocsList) {
for (ScoreDoc sd : topDocs.scoreDocs) {
ids[pos] = sd.doc;
if (scores != null) scores[pos] = sd.score;
pos++;
}
}
DocSlice docs = new DocSlice(docOffset, Math.max(0, ids.length - docOffset), ids, scores, getMatches(), maxScore);
if (getDocList) {
DocIterator iter = docs.iterator();
while (iter.hasNext())
idSet.add(iter.nextDoc());
}
mainResult = docs;
}
@Override @Override
int getMatches() { int getMatches() {
return collector.getMatches(); return collector.getMatches();
@ -243,6 +315,8 @@ public class Grouping {
final SolrIndexSearcher.QueryCommand cmd; final SolrIndexSearcher.QueryCommand cmd;
final List<Command> commands = new ArrayList<Command>(); final List<Command> commands = new ArrayList<Command>();
public DocList mainResult; // output if one of the grouping commands should be used as the main result.
public Grouping(SolrIndexSearcher searcher, SolrIndexSearcher.QueryResult qr, SolrIndexSearcher.QueryCommand cmd) { public Grouping(SolrIndexSearcher searcher, SolrIndexSearcher.QueryResult qr, SolrIndexSearcher.QueryCommand cmd) {
this.searcher = searcher; this.searcher = searcher;
this.qr = qr; this.qr = qr;

View File

@ -291,6 +291,22 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
"'doclist':{'numFound':4,'start':10,'docs':[]}}}" "'doclist':{'numFound':4,'start':10,'docs':[]}}}"
); );
///////////////////////// group.query as main result
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.query","id:[2 TO 5]", "fl","id", "rows","3", "group.main","true")
,"/response=={'numFound':4,'start':0,'docs':[{'id':'3'},{'id':'4'},{'id':'2'}]}"
);
// group.query and offset
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.query","id:[2 TO 5]", "fl","id", "rows","3", "start","2", "group.main","true")
,"/response=={'numFound':4,'start':2,'docs':[{'id':'2'},{'id':'5'}]}"
);
// group.query and big offset
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.query","id:[2 TO 5]", "fl","id", "rows","3", "start","10", "group.main","true")
,"/response=={'numFound':4,'start':10,'docs':[]}"
);
// multiple at once // multiple at once
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true",
"group.query","id:[2 TO 5]", "group.query","id:[2 TO 5]",
@ -304,6 +320,30 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
); );
///////////////////////// group.field as main result
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "group.main","true")
,"/response=={'numFound':10,'start':0,'docs':[{'id':'8'},{'id':'3'},{'id':'4'},{'id':'1'},{'id':'2'}]}"
);
// test that rows limits #docs
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","3", "group.main","true")
,"/response=={'numFound':10,'start':0,'docs':[{'id':'8'},{'id':'3'},{'id':'4'}]}"
);
// small offset
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2", "start","1", "group.main","true")
,"/response=={'numFound':10,'start':1,'docs':[{'id':'3'},{'id':'4'}]}"
);
// large offset
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","2", "start","20", "group.main","true")
,"/response=={'numFound':10,'start':20,'docs':[]}"
);
// group.limit>1
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","3", "group.limit","2", "group.main","true")
,"/response=={'numFound':10,'start':0,'docs':[{'id':'8'},{'id':'10'},{'id':'3'}]}"
);
// group.limit>1 with start>0
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","3", "start","1", "group.limit","2", "group.main","true")
,"/response=={'numFound':10,'start':1,'docs':[{'id':'10'},{'id':'3'},{'id':'6'}]}"
);
}; };