mirror of https://github.com/apache/lucene.git
LUCENE-6708: TopFieldCollector does not compute the score several times on the same document anymore.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1694435 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
dcc4540595
commit
850af3629b
|
@ -43,6 +43,13 @@ API Changes
|
|||
* LUCENE-6706: PayloadTermQuery and PayloadNearQuery have been removed.
|
||||
Instead, use PayloadScoreQuery to wrap any SpanQuery. (Alan Woodward)
|
||||
|
||||
======================= Lucene 5.4.0 =======================
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-6708: TopFieldCollector does not compute the score several times on the
|
||||
same document anymore. (Adrien Grand)
|
||||
|
||||
======================= Lucene 5.3.0 =======================
|
||||
|
||||
New Features
|
||||
|
|
|
@ -43,15 +43,20 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
|
||||
final LeafFieldComparator comparator;
|
||||
final int reverseMul;
|
||||
final boolean mayNeedScoresTwice;
|
||||
Scorer scorer;
|
||||
|
||||
OneComparatorLeafCollector(LeafFieldComparator comparator, int reverseMul) {
|
||||
OneComparatorLeafCollector(LeafFieldComparator comparator, int reverseMul, boolean mayNeedScoresTwice) {
|
||||
this.comparator = comparator;
|
||||
this.reverseMul = reverseMul;
|
||||
this.mayNeedScoresTwice = mayNeedScoresTwice;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
if (mayNeedScoresTwice && scorer instanceof ScoreCachingWrappingScorer == false) {
|
||||
scorer = new ScoreCachingWrappingScorer(scorer);
|
||||
}
|
||||
this.scorer = scorer;
|
||||
comparator.setScorer(scorer);
|
||||
}
|
||||
|
@ -63,13 +68,15 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
final int[] reverseMul;
|
||||
final LeafFieldComparator firstComparator;
|
||||
final int firstReverseMul;
|
||||
final boolean mayNeedScoresTwice;
|
||||
Scorer scorer;
|
||||
|
||||
MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul) {
|
||||
MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul, boolean mayNeedScoresTwice) {
|
||||
this.comparators = comparators;
|
||||
this.reverseMul = reverseMul;
|
||||
firstComparator = comparators[0];
|
||||
firstReverseMul = reverseMul[0];
|
||||
this.mayNeedScoresTwice = mayNeedScoresTwice;
|
||||
}
|
||||
|
||||
protected final int compareBottom(int doc) throws IOException {
|
||||
|
@ -115,210 +122,39 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
this.scorer = scorer;
|
||||
if (mayNeedScoresTwice && scorer instanceof ScoreCachingWrappingScorer == false) {
|
||||
scorer = new ScoreCachingWrappingScorer(scorer);
|
||||
}
|
||||
for (LeafFieldComparator comparator : comparators) {
|
||||
comparator.setScorer(scorer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Implements a TopFieldCollector over one SortField criteria, without
|
||||
* tracking document scores and maxScore.
|
||||
*/
|
||||
private static class NonScoringCollector extends TopFieldCollector {
|
||||
|
||||
final FieldValueHitQueue<Entry> queue;
|
||||
|
||||
public NonScoringCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits, boolean fillFields) {
|
||||
super(queue, numHits, fillFields, sort.needsScores());
|
||||
this.queue = queue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||
docBase = context.docBase;
|
||||
|
||||
final LeafFieldComparator[] comparators = queue.getComparators(context);
|
||||
final int[] reverseMul = queue.getReverseMul();
|
||||
|
||||
if (comparators.length == 1) {
|
||||
return new OneComparatorLeafCollector(comparators[0], reverseMul[0]) {
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
++totalHits;
|
||||
if (queueFull) {
|
||||
if ((reverseMul * comparator.compareBottom(doc)) <= 0) {
|
||||
// since docs are visited in doc Id order, if compare is 0, it means
|
||||
// this document is larger than anything else in the queue, and
|
||||
// therefore not competitive.
|
||||
return;
|
||||
}
|
||||
|
||||
// This hit is competitive - replace bottom element in queue & adjustTop
|
||||
comparator.copy(bottom.slot, doc);
|
||||
updateBottom(doc);
|
||||
comparator.setBottom(bottom.slot);
|
||||
} else {
|
||||
// Startup transient: queue hasn't gathered numHits yet
|
||||
final int slot = totalHits - 1;
|
||||
// Copy hit into queue
|
||||
comparator.copy(slot, doc);
|
||||
add(slot, doc, Float.NaN);
|
||||
if (queueFull) {
|
||||
comparator.setBottom(bottom.slot);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
} else {
|
||||
return new MultiComparatorLeafCollector(comparators, reverseMul) {
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
++totalHits;
|
||||
if (queueFull) {
|
||||
if ((compareBottom(doc)) <= 0) {
|
||||
// since docs are visited in doc Id order, if compare is 0, it means
|
||||
// this document is larger than anything else in the queue, and
|
||||
// therefore not competitive.
|
||||
return;
|
||||
}
|
||||
|
||||
// This hit is competitive - replace bottom element in queue & adjustTop
|
||||
copy(bottom.slot, doc);
|
||||
updateBottom(doc);
|
||||
setBottom(bottom.slot);
|
||||
} else {
|
||||
// Startup transient: queue hasn't gathered numHits yet
|
||||
final int slot = totalHits - 1;
|
||||
// Copy hit into queue
|
||||
copy(slot, doc);
|
||||
add(slot, doc, Float.NaN);
|
||||
if (queueFull) {
|
||||
setBottom(bottom.slot);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Implements a TopFieldCollector over one SortField criteria, while tracking
|
||||
* document scores but no maxScore.
|
||||
*/
|
||||
private static class ScoringNoMaxScoreCollector extends TopFieldCollector {
|
||||
|
||||
final FieldValueHitQueue<Entry> queue;
|
||||
|
||||
public ScoringNoMaxScoreCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits, boolean fillFields) {
|
||||
super(queue, numHits, fillFields, true);
|
||||
this.queue = queue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||
docBase = context.docBase;
|
||||
|
||||
final LeafFieldComparator[] comparators = queue.getComparators(context);
|
||||
final int[] reverseMul = queue.getReverseMul();
|
||||
|
||||
if (comparators.length == 1) {
|
||||
return new OneComparatorLeafCollector(comparators[0], reverseMul[0]) {
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
++totalHits;
|
||||
if (queueFull) {
|
||||
if ((reverseMul * comparator.compareBottom(doc)) <= 0) {
|
||||
// since docs are visited in doc Id order, if compare is 0, it means
|
||||
// this document is largest than anything else in the queue, and
|
||||
// therefore not competitive.
|
||||
return;
|
||||
}
|
||||
|
||||
// Compute the score only if the hit is competitive.
|
||||
final float score = scorer.score();
|
||||
|
||||
// This hit is competitive - replace bottom element in queue & adjustTop
|
||||
comparator.copy(bottom.slot, doc);
|
||||
updateBottom(doc, score);
|
||||
comparator.setBottom(bottom.slot);
|
||||
} else {
|
||||
// Compute the score only if the hit is competitive.
|
||||
final float score = scorer.score();
|
||||
|
||||
// Startup transient: queue hasn't gathered numHits yet
|
||||
final int slot = totalHits - 1;
|
||||
// Copy hit into queue
|
||||
comparator.copy(slot, doc);
|
||||
add(slot, doc, score);
|
||||
if (queueFull) {
|
||||
comparator.setBottom(bottom.slot);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
} else {
|
||||
return new MultiComparatorLeafCollector(comparators, reverseMul) {
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
++totalHits;
|
||||
if (queueFull) {
|
||||
if ((compareBottom(doc)) <= 0) {
|
||||
// since docs are visited in doc Id order, if compare is 0, it means
|
||||
// this document is largest than anything else in the queue, and
|
||||
// therefore not competitive.
|
||||
return;
|
||||
}
|
||||
|
||||
// Compute the score only if the hit is competitive.
|
||||
final float score = scorer.score();
|
||||
|
||||
// This hit is competitive - replace bottom element in queue & adjustTop
|
||||
copy(bottom.slot, doc);
|
||||
updateBottom(doc, score);
|
||||
setBottom(bottom.slot);
|
||||
} else {
|
||||
// Compute the score only if the hit is competitive.
|
||||
final float score = scorer.score();
|
||||
|
||||
// Startup transient: queue hasn't gathered numHits yet
|
||||
final int slot = totalHits - 1;
|
||||
// Copy hit into queue
|
||||
copy(slot, doc);
|
||||
add(slot, doc, score);
|
||||
if (queueFull) {
|
||||
setBottom(bottom.slot);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Implements a TopFieldCollector over one SortField criteria, with tracking
|
||||
* document scores and maxScore.
|
||||
*/
|
||||
private static class ScoringMaxScoreCollector extends TopFieldCollector {
|
||||
private static class SimpleFieldCollector extends TopFieldCollector {
|
||||
|
||||
final FieldValueHitQueue<Entry> queue;
|
||||
final boolean trackDocScores;
|
||||
final boolean trackMaxScore;
|
||||
final boolean mayNeedScoresTwice;
|
||||
|
||||
public ScoringMaxScoreCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits, boolean fillFields) {
|
||||
super(queue, numHits, fillFields, true);
|
||||
public SimpleFieldCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits, boolean fillFields,
|
||||
boolean trackDocScores, boolean trackMaxScore) {
|
||||
super(queue, numHits, fillFields, sort.needsScores() || trackDocScores || trackMaxScore);
|
||||
this.queue = queue;
|
||||
maxScore = Float.MIN_NORMAL; // otherwise we would keep NaN
|
||||
if (trackMaxScore) {
|
||||
maxScore = Float.NEGATIVE_INFINITY; // otherwise we would keep NaN
|
||||
}
|
||||
this.trackDocScores = trackDocScores;
|
||||
this.trackMaxScore = trackMaxScore;
|
||||
// If one of the sort fields needs scores, and if we also track scores, then
|
||||
// we might call scorer.score() several times per doc so wrapping the scorer
|
||||
// to cache scores would help
|
||||
this.mayNeedScoresTwice = sort.needsScores() && (trackDocScores || trackMaxScore);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -329,14 +165,18 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
final int[] reverseMul = queue.getReverseMul();
|
||||
|
||||
if (comparators.length == 1) {
|
||||
return new OneComparatorLeafCollector(comparators[0], reverseMul[0]) {
|
||||
return new OneComparatorLeafCollector(comparators[0], reverseMul[0], mayNeedScoresTwice) {
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
final float score = scorer.score();
|
||||
if (score > maxScore) {
|
||||
maxScore = score;
|
||||
float score = Float.NaN;
|
||||
if (trackMaxScore) {
|
||||
score = scorer.score();
|
||||
if (score > maxScore) {
|
||||
maxScore = score;
|
||||
}
|
||||
}
|
||||
|
||||
++totalHits;
|
||||
if (queueFull) {
|
||||
if (reverseMul * comparator.compareBottom(doc) <= 0) {
|
||||
|
@ -346,6 +186,10 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
return;
|
||||
}
|
||||
|
||||
if (trackDocScores && !trackMaxScore) {
|
||||
score = scorer.score();
|
||||
}
|
||||
|
||||
// This hit is competitive - replace bottom element in queue & adjustTop
|
||||
comparator.copy(bottom.slot, doc);
|
||||
updateBottom(doc, score);
|
||||
|
@ -353,6 +197,11 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
} else {
|
||||
// Startup transient: queue hasn't gathered numHits yet
|
||||
final int slot = totalHits - 1;
|
||||
|
||||
if (trackDocScores && !trackMaxScore) {
|
||||
score = scorer.score();
|
||||
}
|
||||
|
||||
// Copy hit into queue
|
||||
comparator.copy(slot, doc);
|
||||
add(slot, doc, score);
|
||||
|
@ -364,14 +213,18 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
|
||||
};
|
||||
} else {
|
||||
return new MultiComparatorLeafCollector(comparators, reverseMul) {
|
||||
return new MultiComparatorLeafCollector(comparators, reverseMul, mayNeedScoresTwice) {
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
final float score = scorer.score();
|
||||
if (score > maxScore) {
|
||||
maxScore = score;
|
||||
float score = Float.NaN;
|
||||
if (trackMaxScore) {
|
||||
score = scorer.score();
|
||||
if (score > maxScore) {
|
||||
maxScore = score;
|
||||
}
|
||||
}
|
||||
|
||||
++totalHits;
|
||||
if (queueFull) {
|
||||
if (compareBottom(doc) <= 0) {
|
||||
|
@ -381,6 +234,10 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
return;
|
||||
}
|
||||
|
||||
if (trackDocScores && !trackMaxScore) {
|
||||
score = scorer.score();
|
||||
}
|
||||
|
||||
// This hit is competitive - replace bottom element in queue & adjustTop
|
||||
copy(bottom.slot, doc);
|
||||
updateBottom(doc, score);
|
||||
|
@ -388,6 +245,11 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
} else {
|
||||
// Startup transient: queue hasn't gathered numHits yet
|
||||
final int slot = totalHits - 1;
|
||||
|
||||
if (trackDocScores && !trackMaxScore) {
|
||||
score = scorer.score();
|
||||
}
|
||||
|
||||
// Copy hit into queue
|
||||
copy(slot, doc);
|
||||
add(slot, doc, score);
|
||||
|
@ -413,6 +275,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
final boolean trackDocScores;
|
||||
final boolean trackMaxScore;
|
||||
final FieldDoc after;
|
||||
final boolean mayNeedScoresTwice;
|
||||
|
||||
public PagingFieldCollector(Sort sort, FieldValueHitQueue<Entry> queue, FieldDoc after, int numHits, boolean fillFields,
|
||||
boolean trackDocScores, boolean trackMaxScore) {
|
||||
|
@ -421,6 +284,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
this.trackDocScores = trackDocScores;
|
||||
this.trackMaxScore = trackMaxScore;
|
||||
this.after = after;
|
||||
this.mayNeedScoresTwice = sort.needsScores() && (trackDocScores || trackMaxScore);
|
||||
|
||||
// Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN.
|
||||
maxScore = Float.NEGATIVE_INFINITY;
|
||||
|
@ -438,7 +302,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||
docBase = context.docBase;
|
||||
final int afterDoc = after.doc - docBase;
|
||||
return new MultiComparatorLeafCollector(queue.getComparators(context), queue.getReverseMul()) {
|
||||
return new MultiComparatorLeafCollector(queue.getComparators(context), queue.getReverseMul(), mayNeedScoresTwice) {
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
|
@ -628,13 +492,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
|
||||
|
||||
if (after == null) {
|
||||
if (trackMaxScore) {
|
||||
return new ScoringMaxScoreCollector(sort, queue, numHits, fillFields);
|
||||
} else if (trackDocScores) {
|
||||
return new ScoringNoMaxScoreCollector(sort, queue, numHits, fillFields);
|
||||
} else {
|
||||
return new NonScoringCollector(sort, queue, numHits, fillFields);
|
||||
}
|
||||
return new SimpleFieldCollector(sort, queue, numHits, fillFields, trackDocScores, trackMaxScore);
|
||||
} else {
|
||||
if (after.fields == null) {
|
||||
throw new IllegalArgumentException("after.fields wasn't set; you must pass fillFields=true for the previous search");
|
||||
|
|
|
@ -17,13 +17,21 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.FieldValueHitQueue.Entry;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestTopFieldCollector extends LuceneTestCase {
|
||||
private IndexSearcher is;
|
||||
|
@ -166,5 +174,99 @@ public class TestTopFieldCollector extends LuceneTestCase {
|
|||
assertEquals(0, td.totalHits);
|
||||
assertTrue(Float.isNaN(td.getMaxScore()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testComputeScoresOnlyOnce() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
StringField text = new StringField("text", "foo", Store.NO);
|
||||
doc.add(text);
|
||||
NumericDocValuesField relevance = new NumericDocValuesField("relevance", 1);
|
||||
doc.add(relevance);
|
||||
w.addDocument(doc);
|
||||
text.setStringValue("bar");
|
||||
w.addDocument(doc);
|
||||
text.setStringValue("baz");
|
||||
w.addDocument(doc);
|
||||
IndexReader reader = w.getReader();
|
||||
TermQuery foo = new TermQuery(new Term("text", "foo"));
|
||||
TermQuery bar = new TermQuery(new Term("text", "bar"));
|
||||
bar.setBoost(2);
|
||||
TermQuery baz = new TermQuery(new Term("text", "baz"));
|
||||
baz.setBoost(3);
|
||||
Query query = new BooleanQuery.Builder()
|
||||
.add(foo, Occur.SHOULD)
|
||||
.add(bar, Occur.SHOULD)
|
||||
.add(baz, Occur.SHOULD)
|
||||
.build();
|
||||
final IndexSearcher searcher = new IndexSearcher(reader);
|
||||
for (Sort sort : new Sort[] {new Sort(SortField.FIELD_SCORE), new Sort(new SortField("f", SortField.Type.SCORE))}) {
|
||||
for (boolean doDocScores : new boolean[] {false, true}) {
|
||||
for (boolean doMaxScore : new boolean[] {false, true}) {
|
||||
final TopFieldCollector topCollector = TopFieldCollector.create(sort, TestUtil.nextInt(random(), 1, 2), true, doDocScores, doMaxScore);
|
||||
final Collector assertingCollector = new Collector() {
|
||||
@Override
|
||||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||
final LeafCollector in = topCollector.getLeafCollector(context);
|
||||
return new FilterLeafCollector(in) {
|
||||
@Override
|
||||
public void setScorer(final Scorer scorer) throws IOException {
|
||||
Scorer s = new Scorer(null) {
|
||||
|
||||
int lastComputedDoc = -1;
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
if (lastComputedDoc == docID()) {
|
||||
throw new AssertionError("Score computed twice on " + docID());
|
||||
}
|
||||
lastComputedDoc = docID();
|
||||
return scorer.score();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
return scorer.freq();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return scorer.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return scorer.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return scorer.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return scorer.cost();
|
||||
}
|
||||
|
||||
};
|
||||
super.setScorer(s);
|
||||
}
|
||||
};
|
||||
}
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return topCollector.needsScores();
|
||||
}
|
||||
};
|
||||
searcher.search(query, assertingCollector);
|
||||
}
|
||||
}
|
||||
}
|
||||
reader.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue