LUCENE-6708: TopFieldCollector does not compute the score several times on the same document anymore.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1694435 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2015-08-06 09:55:39 +00:00
parent dcc4540595
commit 850af3629b
3 changed files with 176 additions and 209 deletions

View File

@ -43,6 +43,13 @@ API Changes
* LUCENE-6706: PayloadTermQuery and PayloadNearQuery have been removed. * LUCENE-6706: PayloadTermQuery and PayloadNearQuery have been removed.
Instead, use PayloadScoreQuery to wrap any SpanQuery. (Alan Woodward) Instead, use PayloadScoreQuery to wrap any SpanQuery. (Alan Woodward)
======================= Lucene 5.4.0 =======================
Optimizations
* LUCENE-6708: TopFieldCollector does not compute the score several times on the
same document anymore. (Adrien Grand)
======================= Lucene 5.3.0 ======================= ======================= Lucene 5.3.0 =======================
New Features New Features

View File

@ -43,15 +43,20 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
final LeafFieldComparator comparator; final LeafFieldComparator comparator;
final int reverseMul; final int reverseMul;
final boolean mayNeedScoresTwice;
Scorer scorer; Scorer scorer;
OneComparatorLeafCollector(LeafFieldComparator comparator, int reverseMul) { OneComparatorLeafCollector(LeafFieldComparator comparator, int reverseMul, boolean mayNeedScoresTwice) {
this.comparator = comparator; this.comparator = comparator;
this.reverseMul = reverseMul; this.reverseMul = reverseMul;
this.mayNeedScoresTwice = mayNeedScoresTwice;
} }
@Override @Override
public void setScorer(Scorer scorer) throws IOException { public void setScorer(Scorer scorer) throws IOException {
if (mayNeedScoresTwice && scorer instanceof ScoreCachingWrappingScorer == false) {
scorer = new ScoreCachingWrappingScorer(scorer);
}
this.scorer = scorer; this.scorer = scorer;
comparator.setScorer(scorer); comparator.setScorer(scorer);
} }
@ -63,13 +68,15 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
final int[] reverseMul; final int[] reverseMul;
final LeafFieldComparator firstComparator; final LeafFieldComparator firstComparator;
final int firstReverseMul; final int firstReverseMul;
final boolean mayNeedScoresTwice;
Scorer scorer; Scorer scorer;
MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul) { MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul, boolean mayNeedScoresTwice) {
this.comparators = comparators; this.comparators = comparators;
this.reverseMul = reverseMul; this.reverseMul = reverseMul;
firstComparator = comparators[0]; firstComparator = comparators[0];
firstReverseMul = reverseMul[0]; firstReverseMul = reverseMul[0];
this.mayNeedScoresTwice = mayNeedScoresTwice;
} }
protected final int compareBottom(int doc) throws IOException { protected final int compareBottom(int doc) throws IOException {
@ -115,210 +122,39 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
@Override @Override
public void setScorer(Scorer scorer) throws IOException { public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer; this.scorer = scorer;
if (mayNeedScoresTwice && scorer instanceof ScoreCachingWrappingScorer == false) {
scorer = new ScoreCachingWrappingScorer(scorer);
}
for (LeafFieldComparator comparator : comparators) { for (LeafFieldComparator comparator : comparators) {
comparator.setScorer(scorer); comparator.setScorer(scorer);
} }
} }
} }
/*
* Implements a TopFieldCollector over one SortField criteria, without
* tracking document scores and maxScore.
*/
private static class NonScoringCollector extends TopFieldCollector {
final FieldValueHitQueue<Entry> queue;
public NonScoringCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits, boolean fillFields) {
super(queue, numHits, fillFields, sort.needsScores());
this.queue = queue;
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
docBase = context.docBase;
final LeafFieldComparator[] comparators = queue.getComparators(context);
final int[] reverseMul = queue.getReverseMul();
if (comparators.length == 1) {
return new OneComparatorLeafCollector(comparators[0], reverseMul[0]) {
@Override
public void collect(int doc) throws IOException {
++totalHits;
if (queueFull) {
if ((reverseMul * comparator.compareBottom(doc)) <= 0) {
// since docs are visited in doc Id order, if compare is 0, it means
// this document is larger than anything else in the queue, and
// therefore not competitive.
return;
}
// This hit is competitive - replace bottom element in queue & adjustTop
comparator.copy(bottom.slot, doc);
updateBottom(doc);
comparator.setBottom(bottom.slot);
} else {
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
// Copy hit into queue
comparator.copy(slot, doc);
add(slot, doc, Float.NaN);
if (queueFull) {
comparator.setBottom(bottom.slot);
}
}
}
};
} else {
return new MultiComparatorLeafCollector(comparators, reverseMul) {
@Override
public void collect(int doc) throws IOException {
++totalHits;
if (queueFull) {
if ((compareBottom(doc)) <= 0) {
// since docs are visited in doc Id order, if compare is 0, it means
// this document is larger than anything else in the queue, and
// therefore not competitive.
return;
}
// This hit is competitive - replace bottom element in queue & adjustTop
copy(bottom.slot, doc);
updateBottom(doc);
setBottom(bottom.slot);
} else {
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
// Copy hit into queue
copy(slot, doc);
add(slot, doc, Float.NaN);
if (queueFull) {
setBottom(bottom.slot);
}
}
}
};
}
}
}
/*
* Implements a TopFieldCollector over one SortField criteria, while tracking
* document scores but no maxScore.
*/
private static class ScoringNoMaxScoreCollector extends TopFieldCollector {
final FieldValueHitQueue<Entry> queue;
public ScoringNoMaxScoreCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits, boolean fillFields) {
super(queue, numHits, fillFields, true);
this.queue = queue;
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
docBase = context.docBase;
final LeafFieldComparator[] comparators = queue.getComparators(context);
final int[] reverseMul = queue.getReverseMul();
if (comparators.length == 1) {
return new OneComparatorLeafCollector(comparators[0], reverseMul[0]) {
@Override
public void collect(int doc) throws IOException {
++totalHits;
if (queueFull) {
if ((reverseMul * comparator.compareBottom(doc)) <= 0) {
// since docs are visited in doc Id order, if compare is 0, it means
// this document is largest than anything else in the queue, and
// therefore not competitive.
return;
}
// Compute the score only if the hit is competitive.
final float score = scorer.score();
// This hit is competitive - replace bottom element in queue & adjustTop
comparator.copy(bottom.slot, doc);
updateBottom(doc, score);
comparator.setBottom(bottom.slot);
} else {
// Compute the score only if the hit is competitive.
final float score = scorer.score();
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
// Copy hit into queue
comparator.copy(slot, doc);
add(slot, doc, score);
if (queueFull) {
comparator.setBottom(bottom.slot);
}
}
}
};
} else {
return new MultiComparatorLeafCollector(comparators, reverseMul) {
@Override
public void collect(int doc) throws IOException {
++totalHits;
if (queueFull) {
if ((compareBottom(doc)) <= 0) {
// since docs are visited in doc Id order, if compare is 0, it means
// this document is largest than anything else in the queue, and
// therefore not competitive.
return;
}
// Compute the score only if the hit is competitive.
final float score = scorer.score();
// This hit is competitive - replace bottom element in queue & adjustTop
copy(bottom.slot, doc);
updateBottom(doc, score);
setBottom(bottom.slot);
} else {
// Compute the score only if the hit is competitive.
final float score = scorer.score();
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
// Copy hit into queue
copy(slot, doc);
add(slot, doc, score);
if (queueFull) {
setBottom(bottom.slot);
}
}
}
};
}
}
}
/* /*
* Implements a TopFieldCollector over one SortField criteria, with tracking * Implements a TopFieldCollector over one SortField criteria, with tracking
* document scores and maxScore. * document scores and maxScore.
*/ */
private static class ScoringMaxScoreCollector extends TopFieldCollector { private static class SimpleFieldCollector extends TopFieldCollector {
final FieldValueHitQueue<Entry> queue; final FieldValueHitQueue<Entry> queue;
final boolean trackDocScores;
final boolean trackMaxScore;
final boolean mayNeedScoresTwice;
public ScoringMaxScoreCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits, boolean fillFields) { public SimpleFieldCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits, boolean fillFields,
super(queue, numHits, fillFields, true); boolean trackDocScores, boolean trackMaxScore) {
super(queue, numHits, fillFields, sort.needsScores() || trackDocScores || trackMaxScore);
this.queue = queue; this.queue = queue;
maxScore = Float.MIN_NORMAL; // otherwise we would keep NaN if (trackMaxScore) {
maxScore = Float.NEGATIVE_INFINITY; // otherwise we would keep NaN
}
this.trackDocScores = trackDocScores;
this.trackMaxScore = trackMaxScore;
// If one of the sort fields needs scores, and if we also track scores, then
// we might call scorer.score() several times per doc so wrapping the scorer
// to cache scores would help
this.mayNeedScoresTwice = sort.needsScores() && (trackDocScores || trackMaxScore);
} }
@Override @Override
@ -329,14 +165,18 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
final int[] reverseMul = queue.getReverseMul(); final int[] reverseMul = queue.getReverseMul();
if (comparators.length == 1) { if (comparators.length == 1) {
return new OneComparatorLeafCollector(comparators[0], reverseMul[0]) { return new OneComparatorLeafCollector(comparators[0], reverseMul[0], mayNeedScoresTwice) {
@Override @Override
public void collect(int doc) throws IOException { public void collect(int doc) throws IOException {
final float score = scorer.score(); float score = Float.NaN;
if (trackMaxScore) {
score = scorer.score();
if (score > maxScore) { if (score > maxScore) {
maxScore = score; maxScore = score;
} }
}
++totalHits; ++totalHits;
if (queueFull) { if (queueFull) {
if (reverseMul * comparator.compareBottom(doc) <= 0) { if (reverseMul * comparator.compareBottom(doc) <= 0) {
@ -346,6 +186,10 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
return; return;
} }
if (trackDocScores && !trackMaxScore) {
score = scorer.score();
}
// This hit is competitive - replace bottom element in queue & adjustTop // This hit is competitive - replace bottom element in queue & adjustTop
comparator.copy(bottom.slot, doc); comparator.copy(bottom.slot, doc);
updateBottom(doc, score); updateBottom(doc, score);
@ -353,6 +197,11 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
} else { } else {
// Startup transient: queue hasn't gathered numHits yet // Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1; final int slot = totalHits - 1;
if (trackDocScores && !trackMaxScore) {
score = scorer.score();
}
// Copy hit into queue // Copy hit into queue
comparator.copy(slot, doc); comparator.copy(slot, doc);
add(slot, doc, score); add(slot, doc, score);
@ -364,14 +213,18 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
}; };
} else { } else {
return new MultiComparatorLeafCollector(comparators, reverseMul) { return new MultiComparatorLeafCollector(comparators, reverseMul, mayNeedScoresTwice) {
@Override @Override
public void collect(int doc) throws IOException { public void collect(int doc) throws IOException {
final float score = scorer.score(); float score = Float.NaN;
if (trackMaxScore) {
score = scorer.score();
if (score > maxScore) { if (score > maxScore) {
maxScore = score; maxScore = score;
} }
}
++totalHits; ++totalHits;
if (queueFull) { if (queueFull) {
if (compareBottom(doc) <= 0) { if (compareBottom(doc) <= 0) {
@ -381,6 +234,10 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
return; return;
} }
if (trackDocScores && !trackMaxScore) {
score = scorer.score();
}
// This hit is competitive - replace bottom element in queue & adjustTop // This hit is competitive - replace bottom element in queue & adjustTop
copy(bottom.slot, doc); copy(bottom.slot, doc);
updateBottom(doc, score); updateBottom(doc, score);
@ -388,6 +245,11 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
} else { } else {
// Startup transient: queue hasn't gathered numHits yet // Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1; final int slot = totalHits - 1;
if (trackDocScores && !trackMaxScore) {
score = scorer.score();
}
// Copy hit into queue // Copy hit into queue
copy(slot, doc); copy(slot, doc);
add(slot, doc, score); add(slot, doc, score);
@ -413,6 +275,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
final boolean trackDocScores; final boolean trackDocScores;
final boolean trackMaxScore; final boolean trackMaxScore;
final FieldDoc after; final FieldDoc after;
final boolean mayNeedScoresTwice;
public PagingFieldCollector(Sort sort, FieldValueHitQueue<Entry> queue, FieldDoc after, int numHits, boolean fillFields, public PagingFieldCollector(Sort sort, FieldValueHitQueue<Entry> queue, FieldDoc after, int numHits, boolean fillFields,
boolean trackDocScores, boolean trackMaxScore) { boolean trackDocScores, boolean trackMaxScore) {
@ -421,6 +284,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
this.trackDocScores = trackDocScores; this.trackDocScores = trackDocScores;
this.trackMaxScore = trackMaxScore; this.trackMaxScore = trackMaxScore;
this.after = after; this.after = after;
this.mayNeedScoresTwice = sort.needsScores() && (trackDocScores || trackMaxScore);
// Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN. // Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN.
maxScore = Float.NEGATIVE_INFINITY; maxScore = Float.NEGATIVE_INFINITY;
@ -438,7 +302,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
docBase = context.docBase; docBase = context.docBase;
final int afterDoc = after.doc - docBase; final int afterDoc = after.doc - docBase;
return new MultiComparatorLeafCollector(queue.getComparators(context), queue.getReverseMul()) { return new MultiComparatorLeafCollector(queue.getComparators(context), queue.getReverseMul(), mayNeedScoresTwice) {
@Override @Override
public void collect(int doc) throws IOException { public void collect(int doc) throws IOException {
@ -628,13 +492,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits); FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
if (after == null) { if (after == null) {
if (trackMaxScore) { return new SimpleFieldCollector(sort, queue, numHits, fillFields, trackDocScores, trackMaxScore);
return new ScoringMaxScoreCollector(sort, queue, numHits, fillFields);
} else if (trackDocScores) {
return new ScoringNoMaxScoreCollector(sort, queue, numHits, fillFields);
} else {
return new NonScoringCollector(sort, queue, numHits, fillFields);
}
} else { } else {
if (after.fields == null) { if (after.fields == null) {
throw new IllegalArgumentException("after.fields wasn't set; you must pass fillFields=true for the previous search"); throw new IllegalArgumentException("after.fields wasn't set; you must pass fillFields=true for the previous search");

View File

@ -17,13 +17,21 @@ package org.apache.lucene.search;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.FieldValueHitQueue.Entry; import org.apache.lucene.search.FieldValueHitQueue.Entry;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestTopFieldCollector extends LuceneTestCase { public class TestTopFieldCollector extends LuceneTestCase {
private IndexSearcher is; private IndexSearcher is;
@ -167,4 +175,98 @@ public class TestTopFieldCollector extends LuceneTestCase {
assertTrue(Float.isNaN(td.getMaxScore())); assertTrue(Float.isNaN(td.getMaxScore()));
} }
} }
public void testComputeScoresOnlyOnce() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
StringField text = new StringField("text", "foo", Store.NO);
doc.add(text);
NumericDocValuesField relevance = new NumericDocValuesField("relevance", 1);
doc.add(relevance);
w.addDocument(doc);
text.setStringValue("bar");
w.addDocument(doc);
text.setStringValue("baz");
w.addDocument(doc);
IndexReader reader = w.getReader();
TermQuery foo = new TermQuery(new Term("text", "foo"));
TermQuery bar = new TermQuery(new Term("text", "bar"));
bar.setBoost(2);
TermQuery baz = new TermQuery(new Term("text", "baz"));
baz.setBoost(3);
Query query = new BooleanQuery.Builder()
.add(foo, Occur.SHOULD)
.add(bar, Occur.SHOULD)
.add(baz, Occur.SHOULD)
.build();
final IndexSearcher searcher = new IndexSearcher(reader);
for (Sort sort : new Sort[] {new Sort(SortField.FIELD_SCORE), new Sort(new SortField("f", SortField.Type.SCORE))}) {
for (boolean doDocScores : new boolean[] {false, true}) {
for (boolean doMaxScore : new boolean[] {false, true}) {
final TopFieldCollector topCollector = TopFieldCollector.create(sort, TestUtil.nextInt(random(), 1, 2), true, doDocScores, doMaxScore);
final Collector assertingCollector = new Collector() {
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
final LeafCollector in = topCollector.getLeafCollector(context);
return new FilterLeafCollector(in) {
@Override
public void setScorer(final Scorer scorer) throws IOException {
Scorer s = new Scorer(null) {
int lastComputedDoc = -1;
@Override
public float score() throws IOException {
if (lastComputedDoc == docID()) {
throw new AssertionError("Score computed twice on " + docID());
}
lastComputedDoc = docID();
return scorer.score();
}
@Override
public int freq() throws IOException {
return scorer.freq();
}
@Override
public int docID() {
return scorer.docID();
}
@Override
public int nextDoc() throws IOException {
return scorer.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return scorer.advance(target);
}
@Override
public long cost() {
return scorer.cost();
}
};
super.setScorer(s);
}
};
}
@Override
public boolean needsScores() {
return topCollector.needsScores();
}
};
searcher.search(query, assertingCollector);
}
}
}
reader.close();
w.close();
dir.close();
}
} }