From 850af3629b96525bc2c8d52955deaba885013093 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 6 Aug 2015 09:55:39 +0000 Subject: [PATCH] LUCENE-6708: TopFieldCollector does not compute the score several times on the same document anymore. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1694435 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 7 + .../lucene/search/TopFieldCollector.java | 274 +++++------------- .../lucene/search/TestTopFieldCollector.java | 104 ++++++- 3 files changed, 176 insertions(+), 209 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 7bb5e05c087..c2d7d600192 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -43,6 +43,13 @@ API Changes * LUCENE-6706: PayloadTermQuery and PayloadNearQuery have been removed. Instead, use PayloadScoreQuery to wrap any SpanQuery. (Alan Woodward) +======================= Lucene 5.4.0 ======================= + +Optimizations + +* LUCENE-6708: TopFieldCollector does not compute the score several times on the + same document anymore. (Adrien Grand) + ======================= Lucene 5.3.0 ======================= New Features diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java index 3bf860a12c6..4139346f3d3 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java @@ -43,15 +43,20 @@ public abstract class TopFieldCollector extends TopDocsCollector { final LeafFieldComparator comparator; final int reverseMul; + final boolean mayNeedScoresTwice; Scorer scorer; - OneComparatorLeafCollector(LeafFieldComparator comparator, int reverseMul) { + OneComparatorLeafCollector(LeafFieldComparator comparator, int reverseMul, boolean mayNeedScoresTwice) { this.comparator = comparator; this.reverseMul = reverseMul; + this.mayNeedScoresTwice = mayNeedScoresTwice; } @Override public void setScorer(Scorer scorer) throws IOException { + if (mayNeedScoresTwice && scorer instanceof ScoreCachingWrappingScorer == false) { + scorer = new ScoreCachingWrappingScorer(scorer); + } this.scorer = scorer; comparator.setScorer(scorer); } @@ -63,13 +68,15 @@ public abstract class TopFieldCollector extends TopDocsCollector { final int[] reverseMul; final LeafFieldComparator firstComparator; final int firstReverseMul; + final boolean mayNeedScoresTwice; Scorer scorer; - MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul) { + MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul, boolean mayNeedScoresTwice) { this.comparators = comparators; this.reverseMul = reverseMul; firstComparator = comparators[0]; firstReverseMul = reverseMul[0]; + this.mayNeedScoresTwice = mayNeedScoresTwice; } protected final int compareBottom(int doc) throws IOException { @@ -115,210 +122,39 @@ public abstract class TopFieldCollector extends TopDocsCollector { @Override public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; + if (mayNeedScoresTwice && scorer instanceof ScoreCachingWrappingScorer == false) { + scorer = new ScoreCachingWrappingScorer(scorer); + } for (LeafFieldComparator comparator : comparators) { comparator.setScorer(scorer); } } } - /* - * Implements a TopFieldCollector over one SortField criteria, without - * tracking document scores and maxScore. - */ - private static class NonScoringCollector extends TopFieldCollector { - - final FieldValueHitQueue queue; - - public NonScoringCollector(Sort sort, FieldValueHitQueue queue, int numHits, boolean fillFields) { - super(queue, numHits, fillFields, sort.needsScores()); - this.queue = queue; - } - - @Override - public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { - docBase = context.docBase; - - final LeafFieldComparator[] comparators = queue.getComparators(context); - final int[] reverseMul = queue.getReverseMul(); - - if (comparators.length == 1) { - return new OneComparatorLeafCollector(comparators[0], reverseMul[0]) { - - @Override - public void collect(int doc) throws IOException { - ++totalHits; - if (queueFull) { - if ((reverseMul * comparator.compareBottom(doc)) <= 0) { - // since docs are visited in doc Id order, if compare is 0, it means - // this document is larger than anything else in the queue, and - // therefore not competitive. - return; - } - - // This hit is competitive - replace bottom element in queue & adjustTop - comparator.copy(bottom.slot, doc); - updateBottom(doc); - comparator.setBottom(bottom.slot); - } else { - // Startup transient: queue hasn't gathered numHits yet - final int slot = totalHits - 1; - // Copy hit into queue - comparator.copy(slot, doc); - add(slot, doc, Float.NaN); - if (queueFull) { - comparator.setBottom(bottom.slot); - } - } - } - - }; - } else { - return new MultiComparatorLeafCollector(comparators, reverseMul) { - - @Override - public void collect(int doc) throws IOException { - ++totalHits; - if (queueFull) { - if ((compareBottom(doc)) <= 0) { - // since docs are visited in doc Id order, if compare is 0, it means - // this document is larger than anything else in the queue, and - // therefore not competitive. - return; - } - - // This hit is competitive - replace bottom element in queue & adjustTop - copy(bottom.slot, doc); - updateBottom(doc); - setBottom(bottom.slot); - } else { - // Startup transient: queue hasn't gathered numHits yet - final int slot = totalHits - 1; - // Copy hit into queue - copy(slot, doc); - add(slot, doc, Float.NaN); - if (queueFull) { - setBottom(bottom.slot); - } - } - } - - }; - } - } - - } - - /* - * Implements a TopFieldCollector over one SortField criteria, while tracking - * document scores but no maxScore. - */ - private static class ScoringNoMaxScoreCollector extends TopFieldCollector { - - final FieldValueHitQueue queue; - - public ScoringNoMaxScoreCollector(Sort sort, FieldValueHitQueue queue, int numHits, boolean fillFields) { - super(queue, numHits, fillFields, true); - this.queue = queue; - } - - @Override - public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { - docBase = context.docBase; - - final LeafFieldComparator[] comparators = queue.getComparators(context); - final int[] reverseMul = queue.getReverseMul(); - - if (comparators.length == 1) { - return new OneComparatorLeafCollector(comparators[0], reverseMul[0]) { - - @Override - public void collect(int doc) throws IOException { - ++totalHits; - if (queueFull) { - if ((reverseMul * comparator.compareBottom(doc)) <= 0) { - // since docs are visited in doc Id order, if compare is 0, it means - // this document is largest than anything else in the queue, and - // therefore not competitive. - return; - } - - // Compute the score only if the hit is competitive. - final float score = scorer.score(); - - // This hit is competitive - replace bottom element in queue & adjustTop - comparator.copy(bottom.slot, doc); - updateBottom(doc, score); - comparator.setBottom(bottom.slot); - } else { - // Compute the score only if the hit is competitive. - final float score = scorer.score(); - - // Startup transient: queue hasn't gathered numHits yet - final int slot = totalHits - 1; - // Copy hit into queue - comparator.copy(slot, doc); - add(slot, doc, score); - if (queueFull) { - comparator.setBottom(bottom.slot); - } - } - } - - }; - } else { - return new MultiComparatorLeafCollector(comparators, reverseMul) { - - @Override - public void collect(int doc) throws IOException { - ++totalHits; - if (queueFull) { - if ((compareBottom(doc)) <= 0) { - // since docs are visited in doc Id order, if compare is 0, it means - // this document is largest than anything else in the queue, and - // therefore not competitive. - return; - } - - // Compute the score only if the hit is competitive. - final float score = scorer.score(); - - // This hit is competitive - replace bottom element in queue & adjustTop - copy(bottom.slot, doc); - updateBottom(doc, score); - setBottom(bottom.slot); - } else { - // Compute the score only if the hit is competitive. - final float score = scorer.score(); - - // Startup transient: queue hasn't gathered numHits yet - final int slot = totalHits - 1; - // Copy hit into queue - copy(slot, doc); - add(slot, doc, score); - if (queueFull) { - setBottom(bottom.slot); - } - } - } - - }; - } - } - - } - /* * Implements a TopFieldCollector over one SortField criteria, with tracking * document scores and maxScore. */ - private static class ScoringMaxScoreCollector extends TopFieldCollector { + private static class SimpleFieldCollector extends TopFieldCollector { final FieldValueHitQueue queue; + final boolean trackDocScores; + final boolean trackMaxScore; + final boolean mayNeedScoresTwice; - public ScoringMaxScoreCollector(Sort sort, FieldValueHitQueue queue, int numHits, boolean fillFields) { - super(queue, numHits, fillFields, true); + public SimpleFieldCollector(Sort sort, FieldValueHitQueue queue, int numHits, boolean fillFields, + boolean trackDocScores, boolean trackMaxScore) { + super(queue, numHits, fillFields, sort.needsScores() || trackDocScores || trackMaxScore); this.queue = queue; - maxScore = Float.MIN_NORMAL; // otherwise we would keep NaN + if (trackMaxScore) { + maxScore = Float.NEGATIVE_INFINITY; // otherwise we would keep NaN + } + this.trackDocScores = trackDocScores; + this.trackMaxScore = trackMaxScore; + // If one of the sort fields needs scores, and if we also track scores, then + // we might call scorer.score() several times per doc so wrapping the scorer + // to cache scores would help + this.mayNeedScoresTwice = sort.needsScores() && (trackDocScores || trackMaxScore); } @Override @@ -329,14 +165,18 @@ public abstract class TopFieldCollector extends TopDocsCollector { final int[] reverseMul = queue.getReverseMul(); if (comparators.length == 1) { - return new OneComparatorLeafCollector(comparators[0], reverseMul[0]) { + return new OneComparatorLeafCollector(comparators[0], reverseMul[0], mayNeedScoresTwice) { @Override public void collect(int doc) throws IOException { - final float score = scorer.score(); - if (score > maxScore) { - maxScore = score; + float score = Float.NaN; + if (trackMaxScore) { + score = scorer.score(); + if (score > maxScore) { + maxScore = score; + } } + ++totalHits; if (queueFull) { if (reverseMul * comparator.compareBottom(doc) <= 0) { @@ -346,6 +186,10 @@ public abstract class TopFieldCollector extends TopDocsCollector { return; } + if (trackDocScores && !trackMaxScore) { + score = scorer.score(); + } + // This hit is competitive - replace bottom element in queue & adjustTop comparator.copy(bottom.slot, doc); updateBottom(doc, score); @@ -353,6 +197,11 @@ public abstract class TopFieldCollector extends TopDocsCollector { } else { // Startup transient: queue hasn't gathered numHits yet final int slot = totalHits - 1; + + if (trackDocScores && !trackMaxScore) { + score = scorer.score(); + } + // Copy hit into queue comparator.copy(slot, doc); add(slot, doc, score); @@ -364,14 +213,18 @@ public abstract class TopFieldCollector extends TopDocsCollector { }; } else { - return new MultiComparatorLeafCollector(comparators, reverseMul) { + return new MultiComparatorLeafCollector(comparators, reverseMul, mayNeedScoresTwice) { @Override public void collect(int doc) throws IOException { - final float score = scorer.score(); - if (score > maxScore) { - maxScore = score; + float score = Float.NaN; + if (trackMaxScore) { + score = scorer.score(); + if (score > maxScore) { + maxScore = score; + } } + ++totalHits; if (queueFull) { if (compareBottom(doc) <= 0) { @@ -381,6 +234,10 @@ public abstract class TopFieldCollector extends TopDocsCollector { return; } + if (trackDocScores && !trackMaxScore) { + score = scorer.score(); + } + // This hit is competitive - replace bottom element in queue & adjustTop copy(bottom.slot, doc); updateBottom(doc, score); @@ -388,6 +245,11 @@ public abstract class TopFieldCollector extends TopDocsCollector { } else { // Startup transient: queue hasn't gathered numHits yet final int slot = totalHits - 1; + + if (trackDocScores && !trackMaxScore) { + score = scorer.score(); + } + // Copy hit into queue copy(slot, doc); add(slot, doc, score); @@ -413,6 +275,7 @@ public abstract class TopFieldCollector extends TopDocsCollector { final boolean trackDocScores; final boolean trackMaxScore; final FieldDoc after; + final boolean mayNeedScoresTwice; public PagingFieldCollector(Sort sort, FieldValueHitQueue queue, FieldDoc after, int numHits, boolean fillFields, boolean trackDocScores, boolean trackMaxScore) { @@ -421,6 +284,7 @@ public abstract class TopFieldCollector extends TopDocsCollector { this.trackDocScores = trackDocScores; this.trackMaxScore = trackMaxScore; this.after = after; + this.mayNeedScoresTwice = sort.needsScores() && (trackDocScores || trackMaxScore); // Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN. maxScore = Float.NEGATIVE_INFINITY; @@ -438,7 +302,7 @@ public abstract class TopFieldCollector extends TopDocsCollector { public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { docBase = context.docBase; final int afterDoc = after.doc - docBase; - return new MultiComparatorLeafCollector(queue.getComparators(context), queue.getReverseMul()) { + return new MultiComparatorLeafCollector(queue.getComparators(context), queue.getReverseMul(), mayNeedScoresTwice) { @Override public void collect(int doc) throws IOException { @@ -628,13 +492,7 @@ public abstract class TopFieldCollector extends TopDocsCollector { FieldValueHitQueue queue = FieldValueHitQueue.create(sort.fields, numHits); if (after == null) { - if (trackMaxScore) { - return new ScoringMaxScoreCollector(sort, queue, numHits, fillFields); - } else if (trackDocScores) { - return new ScoringNoMaxScoreCollector(sort, queue, numHits, fillFields); - } else { - return new NonScoringCollector(sort, queue, numHits, fillFields); - } + return new SimpleFieldCollector(sort, queue, numHits, fillFields, trackDocScores, trackMaxScore); } else { if (after.fields == null) { throw new IllegalArgumentException("after.fields wasn't set; you must pass fillFields=true for the previous search"); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java index 473f6c65d25..a2c0ca3bab0 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java @@ -17,13 +17,21 @@ package org.apache.lucene.search; * limitations under the License. */ +import java.io.IOException; + import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.FieldValueHitQueue.Entry; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; public class TestTopFieldCollector extends LuceneTestCase { private IndexSearcher is; @@ -166,5 +174,99 @@ public class TestTopFieldCollector extends LuceneTestCase { assertEquals(0, td.totalHits); assertTrue(Float.isNaN(td.getMaxScore())); } - } + } + + public void testComputeScoresOnlyOnce() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + StringField text = new StringField("text", "foo", Store.NO); + doc.add(text); + NumericDocValuesField relevance = new NumericDocValuesField("relevance", 1); + doc.add(relevance); + w.addDocument(doc); + text.setStringValue("bar"); + w.addDocument(doc); + text.setStringValue("baz"); + w.addDocument(doc); + IndexReader reader = w.getReader(); + TermQuery foo = new TermQuery(new Term("text", "foo")); + TermQuery bar = new TermQuery(new Term("text", "bar")); + bar.setBoost(2); + TermQuery baz = new TermQuery(new Term("text", "baz")); + baz.setBoost(3); + Query query = new BooleanQuery.Builder() + .add(foo, Occur.SHOULD) + .add(bar, Occur.SHOULD) + .add(baz, Occur.SHOULD) + .build(); + final IndexSearcher searcher = new IndexSearcher(reader); + for (Sort sort : new Sort[] {new Sort(SortField.FIELD_SCORE), new Sort(new SortField("f", SortField.Type.SCORE))}) { + for (boolean doDocScores : new boolean[] {false, true}) { + for (boolean doMaxScore : new boolean[] {false, true}) { + final TopFieldCollector topCollector = TopFieldCollector.create(sort, TestUtil.nextInt(random(), 1, 2), true, doDocScores, doMaxScore); + final Collector assertingCollector = new Collector() { + @Override + public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { + final LeafCollector in = topCollector.getLeafCollector(context); + return new FilterLeafCollector(in) { + @Override + public void setScorer(final Scorer scorer) throws IOException { + Scorer s = new Scorer(null) { + + int lastComputedDoc = -1; + + @Override + public float score() throws IOException { + if (lastComputedDoc == docID()) { + throw new AssertionError("Score computed twice on " + docID()); + } + lastComputedDoc = docID(); + return scorer.score(); + } + + @Override + public int freq() throws IOException { + return scorer.freq(); + } + + @Override + public int docID() { + return scorer.docID(); + } + + @Override + public int nextDoc() throws IOException { + return scorer.nextDoc(); + } + + @Override + public int advance(int target) throws IOException { + return scorer.advance(target); + } + + @Override + public long cost() { + return scorer.cost(); + } + + }; + super.setScorer(s); + } + }; + } + @Override + public boolean needsScores() { + return topCollector.needsScores(); + } + }; + searcher.search(query, assertingCollector); + } + } + } + reader.close(); + w.close(); + dir.close(); + } + }