SOLR-6323: ReRankingQParserPlugin should handle paging beyond number reranked

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1624303 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Joel Bernstein 2014-09-11 14:34:44 +00:00
parent 2778a0c507
commit 22986445fe
2 changed files with 341 additions and 28 deletions

View File

@ -40,7 +40,8 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.ScoreDoc;
import com.carrotsearch.hppc.IntFloatOpenHashMap;
@ -49,6 +50,7 @@ import org.apache.solr.request.SolrRequestInfo;
import java.io.IOException;
import java.util.Map;
import java.util.Set;
import java.util.Arrays;
import java.util.Comparator;
@ -83,15 +85,14 @@ public class ReRankQParserPlugin extends QParserPlugin {
Query reRankQuery = reRankParser.parse();
int reRankDocs = localParams.getInt("reRankDocs", 200);
reRankDocs = Math.max(1, reRankDocs); //
double reRankWeight = localParams.getDouble("reRankWeight",2.0d);
int start = params.getInt(CommonParams.START,0);
int rows = params.getInt(CommonParams.ROWS,10);
// This enusres that reRankDocs >= docs needed to satisfy the result set.
reRankDocs = Math.max(start+rows, reRankDocs);
return new ReRankQuery(reRankQuery, reRankDocs, reRankWeight);
int length = start+rows;
return new ReRankQuery(reRankQuery, reRankDocs, reRankWeight, length);
}
}
@ -99,6 +100,7 @@ public class ReRankQParserPlugin extends QParserPlugin {
private Query mainQuery = defaultQuery;
private Query reRankQuery;
private int reRankDocs;
private int length;
private double reRankWeight;
private Map<BytesRef, Integer> boostedPriority;
@ -118,10 +120,11 @@ public class ReRankQParserPlugin extends QParserPlugin {
return false;
}
public ReRankQuery(Query reRankQuery, int reRankDocs, double reRankWeight) {
public ReRankQuery(Query reRankQuery, int reRankDocs, double reRankWeight, int length) {
this.reRankQuery = reRankQuery;
this.reRankDocs = reRankDocs;
this.reRankWeight = reRankWeight;
this.length = length;
}
public RankQuery wrap(Query _mainQuery) {
@ -145,7 +148,7 @@ public class ReRankQParserPlugin extends QParserPlugin {
}
}
return new ReRankCollector(reRankDocs, reRankQuery, reRankWeight, cmd, searcher, boostedPriority);
return new ReRankCollector(reRankDocs, length, reRankQuery, reRankWeight, cmd, searcher, boostedPriority);
}
public String toString(String s) {
@ -159,6 +162,16 @@ public class ReRankQParserPlugin extends QParserPlugin {
return toString(null);
}
public Query rewrite(IndexReader reader) throws IOException {
return wrap(this.mainQuery.rewrite(reader));
}
public void extractTerms(Set<Term> terms) {
this.mainQuery.extractTerms(terms);
}
public Weight createWeight(IndexSearcher searcher) throws IOException{
return new ReRankWeight(mainQuery, reRankQuery, reRankWeight, searcher);
}
@ -214,10 +227,13 @@ public class ReRankQParserPlugin extends QParserPlugin {
private TopDocsCollector mainCollector;
private IndexSearcher searcher;
private int reRankDocs;
private int length;
private double reRankWeight;
private Map<BytesRef, Integer> boostedPriority;
public ReRankCollector(int reRankDocs,
int length,
Query reRankQuery,
double reRankWeight,
SolrIndexSearcher.QueryCommand cmd,
@ -226,13 +242,14 @@ public class ReRankQParserPlugin extends QParserPlugin {
super(null);
this.reRankQuery = reRankQuery;
this.reRankDocs = reRankDocs;
this.length = length;
this.boostedPriority = boostedPriority;
Sort sort = cmd.getSort();
if(sort == null) {
this.mainCollector = TopScoreDocCollector.create(this.reRankDocs,true);
this.mainCollector = TopScoreDocCollector.create(Math.max(this.reRankDocs, length),true);
} else {
sort = sort.rewrite(searcher);
this.mainCollector = TopFieldCollector.create(sort, this.reRankDocs, false, true, true, true);
this.mainCollector = TopFieldCollector.create(sort, Math.max(this.reRankDocs, length), false, true, true, true);
}
this.searcher = searcher;
this.reRankWeight = reRankWeight;
@ -259,8 +276,14 @@ public class ReRankQParserPlugin extends QParserPlugin {
}
public TopDocs topDocs(int start, int howMany) {
try {
TopDocs mainDocs = mainCollector.topDocs(0, reRankDocs);
TopDocs mainDocs = mainCollector.topDocs(0, Math.max(reRankDocs, length));
if(mainDocs.totalHits == 0 || mainDocs.scoreDocs.length == 0) {
return mainDocs;
}
if(boostedPriority != null) {
SolrRequestInfo info = SolrRequestInfo.getRequestInfo();
@ -271,6 +294,12 @@ public class ReRankQParserPlugin extends QParserPlugin {
IntIntOpenHashMap boostedDocs = QueryElevationComponent.getBoostDocs((SolrIndexSearcher)searcher, boostedPriority, requestContext);
ScoreDoc[] mainScoreDocs = mainDocs.scoreDocs;
ScoreDoc[] reRankScoreDocs = new ScoreDoc[Math.min(mainScoreDocs.length, reRankDocs)];
System.arraycopy(mainScoreDocs,0,reRankScoreDocs,0,reRankScoreDocs.length);
mainDocs.scoreDocs = reRankScoreDocs;
TopDocs rescoredDocs = new QueryRescorer(reRankQuery) {
@Override
protected float combine(float firstPassScore, boolean secondPassMatches, float secondPassScore) {
@ -280,20 +309,47 @@ public class ReRankQParserPlugin extends QParserPlugin {
}
return score;
}
}.rescore(searcher, mainDocs, reRankDocs);
}.rescore(searcher, mainDocs, mainDocs.scoreDocs.length);
Arrays.sort(rescoredDocs.scoreDocs, new BoostedComp(boostedDocs, mainDocs.scoreDocs, rescoredDocs.getMaxScore()));
if(howMany > rescoredDocs.scoreDocs.length) {
howMany = rescoredDocs.scoreDocs.length;
}
//Lower howMany if we've collected fewer documents.
howMany = Math.min(howMany, mainScoreDocs.length);
if(howMany == rescoredDocs.scoreDocs.length) {
return rescoredDocs; // Just return the rescoredDocs
} else if(howMany > rescoredDocs.scoreDocs.length) {
//We need to return more then we've reRanked, so create the combined page.
ScoreDoc[] scoreDocs = new ScoreDoc[howMany];
System.arraycopy(rescoredDocs.scoreDocs,0,scoreDocs,0,howMany);
System.arraycopy(mainScoreDocs, 0, scoreDocs, 0, scoreDocs.length); //lay down the initial docs
System.arraycopy(rescoredDocs.scoreDocs, 0, scoreDocs, 0, rescoredDocs.scoreDocs.length);//overlay the re-ranked docs.
rescoredDocs.scoreDocs = scoreDocs;
return rescoredDocs;
} else {
return new QueryRescorer(reRankQuery) {
//We've rescored more then we need to return.
ScoreDoc[] scoreDocs = new ScoreDoc[howMany];
System.arraycopy(rescoredDocs.scoreDocs, 0, scoreDocs, 0, howMany);
rescoredDocs.scoreDocs = scoreDocs;
return rescoredDocs;
}
} else {
ScoreDoc[] mainScoreDocs = mainDocs.scoreDocs;
/*
* Create the array for the reRankScoreDocs.
*/
ScoreDoc[] reRankScoreDocs = new ScoreDoc[Math.min(mainScoreDocs.length, reRankDocs)];
/*
* Copy the initial results into the reRankScoreDocs array.
*/
System.arraycopy(mainScoreDocs, 0, reRankScoreDocs, 0, reRankScoreDocs.length);
mainDocs.scoreDocs = reRankScoreDocs;
TopDocs rescoredDocs = new QueryRescorer(reRankQuery) {
@Override
protected float combine(float firstPassScore, boolean secondPassMatches, float secondPassScore) {
float score = firstPassScore;
@ -302,9 +358,31 @@ public class ReRankQParserPlugin extends QParserPlugin {
}
return score;
}
}.rescore(searcher, mainDocs, howMany);
}
}.rescore(searcher, mainDocs, mainDocs.scoreDocs.length);
//Lower howMany to return if we've collected fewer documents.
howMany = Math.min(howMany, mainScoreDocs.length);
if(howMany == rescoredDocs.scoreDocs.length) {
return rescoredDocs; // Just return the rescoredDocs
} else if(howMany > rescoredDocs.scoreDocs.length) {
//We need to return more then we've reRanked, so create the combined page.
ScoreDoc[] scoreDocs = new ScoreDoc[howMany];
//lay down the initial docs
System.arraycopy(mainScoreDocs, 0, scoreDocs, 0, scoreDocs.length);
//overlay the rescoreds docs
System.arraycopy(rescoredDocs.scoreDocs, 0, scoreDocs, 0, rescoredDocs.scoreDocs.length);
rescoredDocs.scoreDocs = scoreDocs;
return rescoredDocs;
} else {
//We've rescored more then we need to return.
ScoreDoc[] scoreDocs = new ScoreDoc[howMany];
System.arraycopy(rescoredDocs.scoreDocs, 0, scoreDocs, 0, howMany);
rescoredDocs.scoreDocs = scoreDocs;
return rescoredDocs;
}
}
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}

View File

@ -48,6 +48,10 @@ public class TestReRankQParserPlugin extends SolrTestCaseJ4 {
@Test
public void testReRankQueries() throws Exception {
assertU(delQ("*:*"));
assertU(commit());
String[] doc = {"id","1", "term_s", "YYYY", "group_s", "group1", "test_ti", "5", "test_tl", "10", "test_tf", "2000"};
assertU(adoc(doc));
assertU(commit());
@ -67,6 +71,9 @@ public class TestReRankQParserPlugin extends SolrTestCaseJ4 {
assertU(adoc(doc5));
assertU(commit());
ModifiableSolrParams params = new ModifiableSolrParams();
params.add("rq", "{!rerank reRankQuery=$rqq reRankDocs=200}");
params.add("q", "term_s:YYYY");
@ -231,9 +238,78 @@ public class TestReRankQParserPlugin extends SolrTestCaseJ4 {
"//result/doc[6]/float[@name='id'][.='3.0']"
);
params = new ModifiableSolrParams();
params.add("rq", "{!rerank reRankQuery=$rqq reRankDocs=4 reRankWeight=2}");
params.add("q", "{!edismax bq=$bqq1}*:*");
params.add("bqq1", "id:1^10 id:2^20 id:3^30 id:4^40 id:5^50 id:6^60");
params.add("rqq", "test_ti:50^1000");
params.add("fl", "id,score");
params.add("start", "0");
params.add("rows", "10");
params.add("qt","/elevate");
params.add("elevateIds", "4,1");
assertQ(req(params), "*[count(//doc)=6]",
"//result/doc[1]/float[@name='id'][.='4.0']", //Elevated
"//result/doc[2]/float[@name='id'][.='1.0']", //Elevated
"//result/doc[3]/float[@name='id'][.='6.0']",
"//result/doc[4]/float[@name='id'][.='5.0']",
"//result/doc[5]/float[@name='id'][.='3.0']",
"//result/doc[6]/float[@name='id'][.='2.0']" //Not in reRankeDocs
);
//Test Elevation with start beyond the rerank docs
params = new ModifiableSolrParams();
params.add("rq", "{!rerank reRankQuery=$rqq reRankDocs=3 reRankWeight=2}");
params.add("q", "{!edismax bq=$bqq1}*:*");
params.add("bqq1", "id:1^10 id:2^20 id:3^30 id:4^40 id:5^50 id:6^60");
params.add("rqq", "test_ti:50^1000");
params.add("fl", "id,score");
params.add("start", "4");
params.add("rows", "10");
params.add("qt","/elevate");
params.add("elevateIds", "4,1");
assertQ(req(params), "*[count(//doc)=2]",
"//result/doc[1]/float[@name='id'][.='3.0']",
"//result/doc[2]/float[@name='id'][.='2.0']" //Was not in reRankDocs
);
//Test Elevation with zero results
params = new ModifiableSolrParams();
params.add("rq", "{!rerank reRankQuery=$rqq reRankDocs=3 reRankWeight=2}");
params.add("q", "{!edismax bq=$bqq1}nada");
params.add("bqq1", "id:1^10 id:2^20 id:3^30 id:4^40 id:5^50 id:6^60");
params.add("rqq", "test_ti:50^1000");
params.add("fl", "id,score");
params.add("start", "4");
params.add("rows", "10");
params.add("qt","/elevate");
params.add("elevateIds", "4,1");
assertQ(req(params), "*[count(//doc)=0]");
//Pass in reRankDocs lower then the length being collected.
params = new ModifiableSolrParams();
params.add("rq", "{!rerank reRankQuery=$rqq reRankDocs=1 reRankWeight=2}");
params.add("q", "{!edismax bq=$bqq1}*:*");
params.add("bqq1", "id:1^10 id:2^20 id:3^30 id:4^40 id:5^50 id:6^60");
params.add("rqq", "test_ti:50^1000");
params.add("fl", "id,score");
params.add("start", "0");
params.add("rows", "10");
assertQ(req(params), "*[count(//doc)=6]",
"//result/doc[1]/float[@name='id'][.='6.0']",
"//result/doc[2]/float[@name='id'][.='5.0']",
"//result/doc[3]/float[@name='id'][.='4.0']",
"//result/doc[4]/float[@name='id'][.='3.0']",
"//result/doc[5]/float[@name='id'][.='2.0']",
"//result/doc[6]/float[@name='id'][.='1.0']"
);
params = new ModifiableSolrParams();
params.add("rq", "{!rerank reRankQuery=$rqq reRankDocs=0 reRankWeight=2}");
params.add("q", "{!edismax bq=$bqq1}*:*");
@ -244,14 +320,31 @@ public class TestReRankQParserPlugin extends SolrTestCaseJ4 {
params.add("rows", "10");
assertQ(req(params), "*[count(//doc)=6]",
"//result/doc[1]/float[@name='id'][.='2.0']",
"//result/doc[2]/float[@name='id'][.='6.0']",
"//result/doc[3]/float[@name='id'][.='5.0']",
"//result/doc[4]/float[@name='id'][.='4.0']",
"//result/doc[5]/float[@name='id'][.='3.0']",
"//result/doc[1]/float[@name='id'][.='6.0']",
"//result/doc[2]/float[@name='id'][.='5.0']",
"//result/doc[3]/float[@name='id'][.='4.0']",
"//result/doc[4]/float[@name='id'][.='3.0']",
"//result/doc[5]/float[@name='id'][.='2.0']",
"//result/doc[6]/float[@name='id'][.='1.0']"
);
params = new ModifiableSolrParams();
params.add("rq", "{!rerank reRankQuery=$rqq reRankDocs=2 reRankWeight=2}");
params.add("q", "{!edismax bq=$bqq1}*:*");
params.add("bqq1", "id:1^10 id:2^20 id:3^30 id:4^40 id:5^50 id:6^60");
params.add("rqq", "test_ti:4^1000");
params.add("fl", "id,score");
params.add("start", "0");
params.add("rows", "10");
assertQ(req(params), "*[count(//doc)=6]",
"//result/doc[1]/float[@name='id'][.='5.0']",
"//result/doc[2]/float[@name='id'][.='6.0']",
"//result/doc[3]/float[@name='id'][.='4.0']",
"//result/doc[4]/float[@name='id'][.='3.0']",
"//result/doc[5]/float[@name='id'][.='2.0']",
"//result/doc[6]/float[@name='id'][.='1.0']"
);
//Test reRankWeight of 0, reranking will have no effect.
params = new ModifiableSolrParams();
@ -271,11 +364,49 @@ public class TestReRankQParserPlugin extends SolrTestCaseJ4 {
"//result/doc[5]/float[@name='id'][.='2.0']"
);
//Test range query
params = new ModifiableSolrParams();
params.add("rq", "{!rerank reRankQuery=$rqq reRankDocs=6}");
params.add("q", "test_ti:[0 TO 2000]");
params.add("rqq", "id:1^10 id:2^20 id:3^30 id:4^40 id:5^50 id:6^60");
params.add("fl", "id,score");
params.add("start", "0");
params.add("rows", "6");
assertQ(req(params), "*[count(//doc)=5]",
"//result/doc[1]/float[@name='id'][.='6.0']",
"//result/doc[2]/float[@name='id'][.='5.0']",
"//result/doc[3]/float[@name='id'][.='4.0']",
"//result/doc[4]/float[@name='id'][.='2.0']",
"//result/doc[5]/float[@name='id'][.='1.0']"
);
//Test range query embedded in larger query
params = new ModifiableSolrParams();
params.add("rq", "{!rerank reRankQuery=$rqq reRankDocs=6}");
params.add("q", "*:* OR test_ti:[0 TO 2000]");
params.add("rqq", "id:1^10 id:2^20 id:3^30 id:4^40 id:5^50 id:6^60");
params.add("fl", "id,score");
params.add("start", "0");
params.add("rows", "6");
assertQ(req(params), "*[count(//doc)=6]",
"//result/doc[1]/float[@name='id'][.='6.0']",
"//result/doc[2]/float[@name='id'][.='5.0']",
"//result/doc[3]/float[@name='id'][.='4.0']",
"//result/doc[4]/float[@name='id'][.='2.0']",
"//result/doc[5]/float[@name='id'][.='1.0']",
"//result/doc[6]/float[@name='id'][.='3.0']"
);
//Test with start beyond reRankDocs
params = new ModifiableSolrParams();
params.add("rq", "{!rerank reRankQuery=$rqq reRankDocs=3 reRankWeight=2}");
params.add("q", "*:*");
params.add("rqq", "id:1^10 id:2^20 id:3^30 id:4^40 id:5^50 id:6^60");
params.add("q", "id:1^10 id:2^20 id:3^30 id:4^40 id:5^50 id:6^60");
params.add("rqq", "id:1^1000");
params.add("fl", "id,score");
params.add("start", "4");
params.add("rows", "5");
@ -285,6 +416,110 @@ public class TestReRankQParserPlugin extends SolrTestCaseJ4 {
"//result/doc[2]/float[@name='id'][.='1.0']"
);
//Test ReRankDocs > docs returned
params = new ModifiableSolrParams();
params.add("rq", "{!rerank reRankQuery=$rqq reRankDocs=6 reRankWeight=2}");
params.add("q", "id:1^10 id:2^20 id:3^30 id:4^40 id:5^50");
params.add("rqq", "id:1^1000");
params.add("fl", "id,score");
params.add("start", "0");
params.add("rows", "1");
assertQ(req(params), "*[count(//doc)=1]",
"//result/doc[1]/float[@name='id'][.='1.0']"
);
//Test with zero results
params = new ModifiableSolrParams();
params.add("rq", "{!rerank reRankQuery=$rqq reRankDocs=3 reRankWeight=2}");
params.add("q", "term_s:NNNN");
params.add("rqq", "id:1^1000");
params.add("fl", "id,score");
params.add("start", "4");
params.add("rows", "5");
assertQ(req(params), "*[count(//doc)=0]");
}
@Test
public void testOverRank() throws Exception {
assertU(delQ("*:*"));
assertU(commit());
//Test the scenario that where we rank more documents then we return.
String[] doc = {"id","1", "term_s", "YYYY", "group_s", "group1", "test_ti", "5", "test_tl", "10", "test_tf", "2000"};
assertU(adoc(doc));
String[] doc1 = {"id","2", "term_s","YYYY", "group_s", "group1", "test_ti", "50", "test_tl", "100", "test_tf", "200"};
assertU(adoc(doc1));
String[] doc2 = {"id","3", "term_s", "YYYY", "test_ti", "5000", "test_tl", "100", "test_tf", "200"};
assertU(adoc(doc2));
String[] doc3 = {"id","4", "term_s", "YYYY", "test_ti", "500", "test_tl", "1000", "test_tf", "2000"};
assertU(adoc(doc3));
String[] doc4 = {"id","5", "term_s", "YYYY", "group_s", "group2", "test_ti", "4", "test_tl", "10", "test_tf", "2000"};
assertU(adoc(doc4));
String[] doc5 = {"id","6", "term_s","YYYY", "group_s", "group2", "test_ti", "10", "test_tl", "100", "test_tf", "200"};
assertU(adoc(doc5));
String[] doc6 = {"id","7", "term_s", "YYYY", "group_s", "group1", "test_ti", "5", "test_tl", "10", "test_tf", "2000"};
assertU(adoc(doc6));
String[] doc7 = {"id","8", "term_s","YYYY", "group_s", "group1", "test_ti", "50", "test_tl", "100", "test_tf", "200"};
assertU(adoc(doc7));
String[] doc8 = {"id","9", "term_s", "YYYY", "test_ti", "5000", "test_tl", "100", "test_tf", "200"};
assertU(adoc(doc8));
String[] doc9 = {"id","10", "term_s", "YYYY", "test_ti", "500", "test_tl", "1000", "test_tf", "2000"};
assertU(adoc(doc9));
String[] doc10 = {"id","11", "term_s", "YYYY", "group_s", "group2", "test_ti", "4", "test_tl", "10", "test_tf", "2000"};
assertU(adoc(doc10));
assertU(commit());
ModifiableSolrParams params = new ModifiableSolrParams();
params.add("rq", "{!rerank reRankQuery=$rqq reRankDocs=11 reRankWeight=2}");
params.add("q", "{!edismax bq=$bqq1}*:*");
params.add("bqq1", "id:1^10 id:2^20 id:3^30 id:4^40 id:5^50 id:6^60 id:7^70 id:8^80 id:9^90 id:10^100 id:11^110");
params.add("rqq", "test_ti:50^1000");
params.add("fl", "id,score");
params.add("start", "0");
params.add("rows", "2");
assertQ(req(params), "*[count(//doc)=2]",
"//result/doc[1]/float[@name='id'][.='8.0']",
"//result/doc[2]/float[@name='id'][.='2.0']"
);
//Test Elevation
params = new ModifiableSolrParams();
params.add("rq", "{!rerank reRankQuery=$rqq reRankDocs=6 reRankWeight=2}");
params.add("q", "{!edismax bq=$bqq1}*:*");
params.add("bqq1", "id:1^10 id:2^20 id:3^30 id:4^40 id:5^50 id:6^60 id:7^70 id:8^80 id:9^90 id:10^100 id:11^110");
params.add("rqq", "test_ti:50^1000");
params.add("fl", "id,score");
params.add("start", "0");
params.add("rows", "3");
params.add("qt","/elevate");
params.add("elevateIds", "1,4");
assertQ(req(params), "*[count(//doc)=3]",
"//result/doc[1]/float[@name='id'][.='1.0']", //Elevated
"//result/doc[2]/float[@name='id'][.='4.0']", //Elevated
"//result/doc[3]/float[@name='id'][.='8.0']"); //Boosted during rerank.
}
}