mirror of https://github.com/apache/lucene.git
Revert "LUCENE-8956: QueryRescorer now only sorts the first topN hits instead of all initial hits."
This reverts commit fd3ae87805
.
This commit is contained in:
parent
02792de0e5
commit
e1c4742abf
|
@ -134,9 +134,6 @@ the total hits is not requested.
|
||||||
* LUCENE-8755: spatial-extras quad and packed quad prefix trees now index points faster.
|
* LUCENE-8755: spatial-extras quad and packed quad prefix trees now index points faster.
|
||||||
(Chongchen Chen, David Smiley)
|
(Chongchen Chen, David Smiley)
|
||||||
|
|
||||||
* LUCENE-8956: QueryRescorer now only sorts the first topN hits instead of all
|
|
||||||
initial hits. (Paul Sanwald via Adrien Grand)
|
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
|
||||||
* LUCENE-8755: spatial-extras quad and packed quad prefix trees could throw a
|
* LUCENE-8755: spatial-extras quad and packed quad prefix trees could throw a
|
||||||
|
|
|
@ -23,7 +23,6 @@ import java.util.Comparator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
|
||||||
|
|
||||||
/** A {@link Rescorer} that uses a provided Query to assign
|
/** A {@link Rescorer} that uses a provided Query to assign
|
||||||
* scores to the first-pass hits.
|
* scores to the first-pass hits.
|
||||||
|
@ -51,7 +50,6 @@ public abstract class QueryRescorer extends Rescorer {
|
||||||
@Override
|
@Override
|
||||||
public TopDocs rescore(IndexSearcher searcher, TopDocs firstPassTopDocs, int topN) throws IOException {
|
public TopDocs rescore(IndexSearcher searcher, TopDocs firstPassTopDocs, int topN) throws IOException {
|
||||||
ScoreDoc[] hits = firstPassTopDocs.scoreDocs.clone();
|
ScoreDoc[] hits = firstPassTopDocs.scoreDocs.clone();
|
||||||
|
|
||||||
Arrays.sort(hits,
|
Arrays.sort(hits,
|
||||||
new Comparator<ScoreDoc>() {
|
new Comparator<ScoreDoc>() {
|
||||||
@Override
|
@Override
|
||||||
|
@ -111,7 +109,11 @@ public abstract class QueryRescorer extends Rescorer {
|
||||||
hitUpto++;
|
hitUpto++;
|
||||||
}
|
}
|
||||||
|
|
||||||
Comparator<ScoreDoc> sortDocComparator = new Comparator<ScoreDoc>() {
|
// TODO: we should do a partial sort (of only topN)
|
||||||
|
// instead, but typically the number of hits is
|
||||||
|
// smallish:
|
||||||
|
Arrays.sort(hits,
|
||||||
|
new Comparator<ScoreDoc>() {
|
||||||
@Override
|
@Override
|
||||||
public int compare(ScoreDoc a, ScoreDoc b) {
|
public int compare(ScoreDoc a, ScoreDoc b) {
|
||||||
// Sort by score descending, then docID ascending:
|
// Sort by score descending, then docID ascending:
|
||||||
|
@ -125,17 +127,14 @@ public abstract class QueryRescorer extends Rescorer {
|
||||||
return a.doc - b.doc;
|
return a.doc - b.doc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
});
|
||||||
|
|
||||||
if (topN < hits.length) {
|
if (topN < hits.length) {
|
||||||
ArrayUtil.select(hits, 0, hits.length, topN, sortDocComparator);
|
|
||||||
ScoreDoc[] subset = new ScoreDoc[topN];
|
ScoreDoc[] subset = new ScoreDoc[topN];
|
||||||
System.arraycopy(hits, 0, subset, 0, topN);
|
System.arraycopy(hits, 0, subset, 0, topN);
|
||||||
hits = subset;
|
hits = subset;
|
||||||
}
|
}
|
||||||
|
|
||||||
Arrays.sort(hits, sortDocComparator);
|
|
||||||
|
|
||||||
return new TopDocs(firstPassTopDocs.totalHits, hits);
|
return new TopDocs(firstPassTopDocs.totalHits, hits);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -479,23 +479,12 @@ public final class ArrayUtil {
|
||||||
timSort(a, 0, a.length);
|
timSort(a, 0, a.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** Reorganize {@code arr[from:to[} so that the element at offset k is at the
|
||||||
* Reorganize {@code arr[from:to[} so that the element at offset k is at the
|
* same position as if {@code arr[from:to[} was sorted, and all elements on
|
||||||
* same position as if {@code arr[from:to]} was sorted, and all elements on
|
|
||||||
* its left are less than or equal to it, and all elements on its right are
|
* its left are less than or equal to it, and all elements on its right are
|
||||||
* greater than or equal to it.
|
* greater than or equal to it.
|
||||||
*
|
|
||||||
* This runs in linear time on average and in {@code n log(n)} time in the
|
* This runs in linear time on average and in {@code n log(n)} time in the
|
||||||
* worst case.
|
* worst case.*/
|
||||||
*
|
|
||||||
* @param arr Array to be re-organized.
|
|
||||||
* @param from Starting index for re-organization. Elements before this index
|
|
||||||
* will be left as is.
|
|
||||||
* @param to Ending index. Elements after this index will be left as is.
|
|
||||||
* @param k Index of element to sort from. Value must be less than 'to' and greater than or equal to 'from'.
|
|
||||||
* @param comparator Comparator to use for sorting
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public static <T> void select(T[] arr, int from, int to, int k, Comparator<? super T> comparator) {
|
public static <T> void select(T[] arr, int from, int to, int k, Comparator<? super T> comparator) {
|
||||||
new IntroSelector() {
|
new IntroSelector() {
|
||||||
|
|
||||||
|
|
|
@ -20,9 +20,7 @@ package org.apache.lucene.search;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.NumericDocValuesField;
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
|
@ -32,13 +30,11 @@ import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
|
||||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||||
import org.apache.lucene.search.spans.SpanQuery;
|
import org.apache.lucene.search.spans.SpanQuery;
|
||||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
|
@ -58,93 +54,6 @@ public class TestQueryRescorer extends LuceneTestCase {
|
||||||
return LuceneTestCase.newIndexWriterConfig().setSimilarity(new ClassicSimilarity());
|
return LuceneTestCase.newIndexWriterConfig().setSimilarity(new ClassicSimilarity());
|
||||||
}
|
}
|
||||||
|
|
||||||
static List<String> dictionary = Arrays.asList("river","quick","brown","fox","jumped","lazy","fence");
|
|
||||||
|
|
||||||
String randomSentence() {
|
|
||||||
final int length = random().nextInt(10);
|
|
||||||
StringBuilder sentence = new StringBuilder(dictionary.get(0)+" ");
|
|
||||||
for (int i = 0; i < length; i++) {
|
|
||||||
sentence.append(dictionary.get(random().nextInt(dictionary.size()-1))+" ");
|
|
||||||
}
|
|
||||||
return sentence.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
private IndexReader publishDocs(int numDocs, String fieldName, Directory dir) throws Exception {
|
|
||||||
|
|
||||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
|
|
||||||
for (int i = 0; i < numDocs; i++) {
|
|
||||||
Document d = new Document();
|
|
||||||
d.add(newStringField("id", Integer.toString(i), Field.Store.YES));
|
|
||||||
d.add(newTextField(fieldName, randomSentence(), Field.Store.NO));
|
|
||||||
w.addDocument(d);
|
|
||||||
}
|
|
||||||
IndexReader reader = w.getReader();
|
|
||||||
w.close();
|
|
||||||
return reader;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testRescoreOfASubsetOfHits() throws Exception {
|
|
||||||
Directory dir = newDirectory();
|
|
||||||
int numDocs = 100;
|
|
||||||
String fieldName = "field";
|
|
||||||
IndexReader reader = publishDocs(numDocs, fieldName, dir);
|
|
||||||
|
|
||||||
// Construct a query that will get numDocs hits.
|
|
||||||
String wordOne = dictionary.get(0);
|
|
||||||
TermQuery termQuery = new TermQuery(new Term(fieldName, wordOne));
|
|
||||||
IndexSearcher searcher = getSearcher(reader);
|
|
||||||
searcher.setSimilarity(new BM25Similarity());
|
|
||||||
TopDocs hits = searcher.search(termQuery, numDocs);
|
|
||||||
|
|
||||||
// Next, use a more specific phrase query that will return different scores
|
|
||||||
// from the above term query
|
|
||||||
String wordTwo = RandomPicks.randomFrom(random(), dictionary);
|
|
||||||
PhraseQuery phraseQuery = new PhraseQuery(1, fieldName, wordOne, wordTwo);
|
|
||||||
|
|
||||||
// rescore, requesting a smaller topN
|
|
||||||
int topN = random().nextInt(numDocs-1);
|
|
||||||
TopDocs phraseQueryHits = QueryRescorer.rescore(searcher, hits, phraseQuery, 2.0, topN);
|
|
||||||
assertEquals(topN, phraseQueryHits.scoreDocs.length);
|
|
||||||
|
|
||||||
for (int i = 1; i < phraseQueryHits.scoreDocs.length; i++) {
|
|
||||||
assertTrue(phraseQueryHits.scoreDocs[i].score <= phraseQueryHits.scoreDocs[i-1].score);
|
|
||||||
}
|
|
||||||
reader.close();
|
|
||||||
dir.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testRescoreIsIdempotent() throws Exception {
|
|
||||||
Directory dir = newDirectory();
|
|
||||||
int numDocs = 100;
|
|
||||||
String fieldName = "field";
|
|
||||||
IndexReader reader = publishDocs(numDocs, fieldName, dir);
|
|
||||||
|
|
||||||
// Construct a query that will get numDocs hits.
|
|
||||||
String wordOne = dictionary.get(0);
|
|
||||||
TermQuery termQuery = new TermQuery(new Term(fieldName, wordOne));
|
|
||||||
IndexSearcher searcher = getSearcher(reader);
|
|
||||||
searcher.setSimilarity(new BM25Similarity());
|
|
||||||
TopDocs hits = searcher.search(termQuery, numDocs);
|
|
||||||
|
|
||||||
// Next, use a more specific phrase query that will return different scores
|
|
||||||
// from the above term query
|
|
||||||
String wordTwo = RandomPicks.randomFrom(random(), dictionary);
|
|
||||||
PhraseQuery phraseQuery = new PhraseQuery(1, fieldName, wordOne, wordTwo);
|
|
||||||
|
|
||||||
// rescore, requesting the same hits as topN
|
|
||||||
int topN = numDocs;
|
|
||||||
TopDocs firstRescoreHits = QueryRescorer.rescore(searcher, hits, phraseQuery, 2.0, topN);
|
|
||||||
|
|
||||||
// now rescore again, where topN is less than numDocs
|
|
||||||
topN = random().nextInt(numDocs-1);
|
|
||||||
ScoreDoc[] secondRescoreHits = QueryRescorer.rescore(searcher, hits, phraseQuery, 2.0, topN).scoreDocs;
|
|
||||||
ScoreDoc[] expectedTopNScoreDocs = ArrayUtil.copyOfSubArray(firstRescoreHits.scoreDocs, 0, topN);
|
|
||||||
CheckHits.checkEqual(phraseQuery, expectedTopNScoreDocs, secondRescoreHits);
|
|
||||||
|
|
||||||
reader.close();
|
|
||||||
dir.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testBasic() throws Exception {
|
public void testBasic() throws Exception {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
|
||||||
|
|
Loading…
Reference in New Issue