refactor updateMinCompetitiveScore and ensures that the recorded min competitive score is resetted on setScorer

This commit is contained in:
jimczi 2019-10-01 12:32:23 +02:00
parent ee50a3812c
commit 7eab6ebb27
4 changed files with 195 additions and 190 deletions

View File

@ -124,7 +124,9 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
@Override
public void setScorer(Scorable scorer) throws IOException {
super.setScorer(scorer);
updateMinCompetitiveScore(scorer);
// reset the minimum competitive score
minCompetitiveScore = 0f;
updateMinCompetitiveScore(scorer, true);
}
@Override
@ -143,10 +145,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
} else {
collectedAllCompetitiveHits = true;
}
} else if (totalHitsRelation == Relation.EQUAL_TO || shouldUpdateMinScore()) {
// we just reached totalHitsThreshold, we can start setting the min
// competitive score now
updateMinCompetitiveScore(scorer);
} else {
updateMinCompetitiveScore(scorer, totalHitsRelation == Relation.EQUAL_TO);
}
return;
}
@ -155,7 +155,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
comparator.copy(bottom.slot, doc);
updateBottom(doc);
comparator.setBottom(bottom.slot);
updateMinCompetitiveScore(scorer);
updateMinCompetitiveScore(scorer, true);
} else {
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
@ -165,7 +165,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
add(slot, doc);
if (queueFull) {
comparator.setBottom(bottom.slot);
updateMinCompetitiveScore(scorer);
updateMinCompetitiveScore(scorer, true);
}
}
}
@ -214,7 +214,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
@Override
public void setScorer(Scorable scorer) throws IOException {
super.setScorer(scorer);
updateMinCompetitiveScore(scorer);
minCompetitiveScore = 0f;
updateMinCompetitiveScore(scorer, true);
}
@Override
@ -238,10 +239,10 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
} else {
collectedAllCompetitiveHits = true;
}
} else if (totalHitsRelation == Relation.EQUAL_TO || shouldUpdateMinScore()) {
} else {
// we just reached totalHitsThreshold, we can start setting the min
// competitive score now
updateMinCompetitiveScore(scorer);
updateMinCompetitiveScore(scorer, totalHitsRelation == Relation.EQUAL_TO);
}
return;
}
@ -260,7 +261,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
updateBottom(doc);
comparator.setBottom(bottom.slot);
updateMinCompetitiveScore(scorer);
updateMinCompetitiveScore(scorer, true);
} else {
collectedHits++;
@ -274,7 +275,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
queueFull = collectedHits == numHits;
if (queueFull) {
comparator.setBottom(bottom.slot);
updateMinCompetitiveScore(scorer);
updateMinCompetitiveScore(scorer, true);
}
}
}
@ -290,10 +291,9 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
final BottomValueChecker bottomValueChecker;
final FieldComparator.RelevanceComparator firstComparator;
final boolean canSetMinScore;
// the minimum score (if canSetMinScore is true) that is currently used by the scorer,
// can be different than pqTop.score if the provided bottomValueChecker reports a minimum
// score that is greater than the local one.
float minScore;
// the minimum competitive score (if canSetMinScore is true) that is currently
// used by the underlying scorer (see setScorer)
float minCompetitiveScore;
final int numComparators;
FieldValueHitQueue.Entry bottom = null;
boolean queueFull;
@ -335,28 +335,31 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
return scoreMode;
}
protected boolean shouldUpdateMinScore() {
return bottomValueChecker != null ? bottomValueChecker.getBottomValue() > minScore : false;
}
protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
if (canSetMinScore && hitsThresholdChecker.isThresholdReached()
&& (queueFull || (bottomValueChecker != null && bottomValueChecker.getBottomValue() > 0f))) {
float maxMinScore = Float.NEGATIVE_INFINITY;
if (queueFull) {
protected void updateMinCompetitiveScore(Scorable scorer, boolean checkQueue) throws IOException {
if (canSetMinScore && hitsThresholdChecker.isThresholdReached()) {
boolean hasChanged = false;
if (checkQueue && queueFull) {
assert bottom != null && firstComparator != null;
maxMinScore = firstComparator.value(bottom.slot);
if (bottomValueChecker != null) {
bottomValueChecker.updateThreadLocalBottomValue(maxMinScore);
float localMinScore = firstComparator.value(bottom.slot);
if (localMinScore > minCompetitiveScore) {
hasChanged = true;
minCompetitiveScore = localMinScore;
if (bottomValueChecker != null) {
bottomValueChecker.updateThreadLocalBottomValue(minCompetitiveScore);
}
}
}
if (bottomValueChecker != null) {
maxMinScore = Math.max(maxMinScore, bottomValueChecker.getBottomValue());
float globalMinScore = bottomValueChecker.getBottomValue();
if (globalMinScore > minCompetitiveScore) {
hasChanged = true;
minCompetitiveScore = globalMinScore;
}
}
if (hasChanged) {
scorer.setMinCompetitiveScore(minCompetitiveScore);
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
}
assert maxMinScore > 0f;
scorer.setMinCompetitiveScore(maxMinScore);
minScore = maxMinScore;
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
}
}

View File

@ -62,7 +62,9 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
@Override
public void setScorer(Scorable scorer) throws IOException {
super.setScorer(scorer);
updateMinCompetitiveScore(scorer);
// reset the minimum competitive score
minCompetitiveScore = 0f;
updateMinCompetitiveScore(scorer, true);
}
@Override
@ -76,12 +78,10 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
hitsThresholdChecker.incrementHitCount();
if (score <= pqTop.score) {
if ((totalHitsRelation == TotalHits.Relation.EQUAL_TO || shouldUpdateMinScore())
&& hitsThresholdChecker.isThresholdReached()) {
// we just reached totalHitsThreshold, we can start setting the min
// competitive score now
updateMinCompetitiveScore(scorer);
}
// the document is not competitive but we need to update the minimum competitive score
// if we just reached the total hits threshold or if the global minimum score (bottomValueChecker)
// has been updated
updateMinCompetitiveScore(scorer, totalHitsRelation == TotalHits.Relation.EQUAL_TO);
// Since docs are returned in-order (i.e., increasing doc Id), a document
// with equal score to pqTop.score cannot compete since HitQueue favors
// documents with lower doc Ids. Therefore reject those docs too.
@ -90,7 +90,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
pqTop.doc = doc + docBase;
pqTop.score = score;
pqTop = pq.updateTop();
updateMinCompetitiveScore(scorer);
updateMinCompetitiveScore(scorer, true);
}
};
@ -140,12 +140,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
if (score > after.score || (score == after.score && doc <= afterDoc)) {
// hit was collected on a previous page
if ((totalHitsRelation == TotalHits.Relation.EQUAL_TO || shouldUpdateMinScore())
&& hitsThresholdChecker.isThresholdReached()) {
// we just reached totalHitsThreshold, we can start setting the min
// competitive score now
updateMinCompetitiveScore(scorer);
}
updateMinCompetitiveScore(scorer, totalHitsRelation == TotalHits.Relation.EQUAL_TO);
return;
}
@ -159,7 +154,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
pqTop.doc = doc + docBase;
pqTop.score = score;
pqTop = pq.updateTop();
updateMinCompetitiveScore(scorer);
updateMinCompetitiveScore(scorer, true);
}
};
}
@ -249,10 +244,9 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
}
ScoreDoc pqTop;
// the minimum score that is currently used by the scorer, can be different than pqTop.score
// if the provided bottomValueChecker reports a minimum score that is greater than the local
// one.
float minScore;
// the minimum competitive score that is currently
// used by the underlying scorer (see setScorer)
float minCompetitiveScore;
final HitsThresholdChecker hitsThresholdChecker;
final BottomValueChecker bottomValueChecker;
@ -283,36 +277,44 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
return hitsThresholdChecker.scoreMode();
}
protected boolean shouldUpdateMinScore() {
return bottomValueChecker != null ? bottomValueChecker.getBottomValue() > minScore : false;
public boolean isQueueFull() {
return pqTop != null && pqTop.score != Float.NEGATIVE_INFINITY;
}
protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
if (hitsThresholdChecker.isThresholdReached()
&& ((bottomValueChecker != null && bottomValueChecker.getBottomValue() > 0)
|| (pqTop != null && pqTop.score != Float.NEGATIVE_INFINITY))) { // -Infinity is the score of sentinels
// since we tie-break on doc id and collect in doc id order, we can require
// the next float
float bottomScore = Float.NEGATIVE_INFINITY;
if (pqTop != null && pqTop.score != Float.NEGATIVE_INFINITY) {
bottomScore = Math.nextUp(pqTop.score);
if (bottomValueChecker != null) {
bottomValueChecker.updateThreadLocalBottomValue(pqTop.score);
protected void updateMinCompetitiveScore(Scorable scorer, boolean checkQueue) throws IOException {
if (hitsThresholdChecker.isThresholdReached()) {
boolean hasChanged = false;
if (checkQueue && isQueueFull()) {
// since we tie-break on doc id and collect in doc id order, we can require
// the next float
float localMinScore = Math.nextUp(pqTop.score);
if (localMinScore > minCompetitiveScore) {
hasChanged = true;
minCompetitiveScore = localMinScore;
if (bottomValueChecker != null) {
// we don't use the next float here since we register a minimum value
// for other segments that might have smaller doc ids
bottomValueChecker.updateThreadLocalBottomValue(pqTop.score);
}
}
}
// Global bottom can only be greater than or equal to the local bottom score
// The updating of global bottom score for this hit before getting here should
// ensure that
if (bottomValueChecker != null && bottomValueChecker.getBottomValue() > bottomScore) {
bottomScore = bottomValueChecker.getBottomValue();
if (bottomValueChecker != null) {
float globalMinScore = bottomValueChecker.getBottomValue();
if (globalMinScore > minCompetitiveScore) {
assert isQueueFull() == false || bottomValueChecker.getBottomValue() > Math.nextUp(pqTop.score);
hasChanged = true;
minCompetitiveScore = bottomValueChecker.getBottomValue();
}
}
scorer.setMinCompetitiveScore(bottomScore);
minScore = bottomScore;
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
if (hasChanged) {
scorer.setMinCompetitiveScore(minCompetitiveScore);
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
}
}
}
}

View File

@ -476,7 +476,7 @@ public class TestTopDocsCollector extends LuceneTestCase {
dir.close();
}
public void testConcurrentMinScoreTopScoreDocs() throws Exception {
public void testConcurrentMinScore() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE));
Document doc = new Document();
@ -591,121 +591,4 @@ public class TestTopDocsCollector extends LuceneTestCase {
reader.close();
dir.close();
}
public void testConcurrentMinScoreTopFieldDocs() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE));
Document doc = new Document();
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
w.flush();
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc, doc));
w.flush();
w.addDocuments(Arrays.asList(doc, doc));
w.flush();
IndexReader reader = DirectoryReader.open(w);
assertEquals(3, reader.leaves().size());
w.close();
Sort sort = new Sort(SortField.FIELD_SCORE, SortField.FIELD_DOC);
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, 2, null, 0);
TopFieldCollector collector = manager.newCollector();
TopFieldCollector collector2 = manager.newCollector();
assertTrue(collector.bottomValueChecker == collector2.bottomValueChecker);
BottomValueChecker minValueChecker = collector.bottomValueChecker;
ScoreAndDoc scorer = new ScoreAndDoc();
ScoreAndDoc scorer2 = new ScoreAndDoc();
LeafCollector leafCollector = collector.getLeafCollector(reader.leaves().get(0));
leafCollector.setScorer(scorer);
LeafCollector leafCollector2 = collector2.getLeafCollector(reader.leaves().get(1));
leafCollector2.setScorer(scorer2);
scorer.doc = 0;
scorer.score = 3;
leafCollector.collect(0);
scorer2.doc = 0;
scorer2.score = 6;
leafCollector2.collect(0);
scorer.doc = 1;
scorer.score = 2;
leafCollector.collect(1);
assertEquals(minValueChecker.getBottomValue(), 2f, 0f);
assertEquals(scorer.minCompetitiveScore, 2f, 0f);
assertNull(scorer2.minCompetitiveScore);
scorer2.doc = 1;
scorer2.score = 9;
leafCollector2.collect(1);
assertEquals(minValueChecker.getBottomValue(), 6f, 0f);
assertEquals(scorer.minCompetitiveScore, 2f, 0f);
assertEquals(scorer2.minCompetitiveScore, 6f, 0f);
scorer2.doc = 2;
scorer2.score = 7;
leafCollector2.collect(2);
assertEquals(minValueChecker.getBottomValue(), 7f, 0f);
assertEquals(scorer.minCompetitiveScore, 2f, 0f);
assertEquals(scorer2.minCompetitiveScore, 7f, 0f);
scorer2.doc = 3;
scorer2.score = 1;
leafCollector2.collect(3);
assertEquals(minValueChecker.getBottomValue(), 7f, 0f);
assertEquals(scorer.minCompetitiveScore, 2f, 0f);
assertEquals(scorer2.minCompetitiveScore, 7f, 0f);
scorer.doc = 2;
scorer.score = 10;
leafCollector.collect(2);
assertEquals(minValueChecker.getBottomValue(), 7f, 0f);
assertEquals(scorer.minCompetitiveScore, 7f, 0f);
assertEquals(scorer2.minCompetitiveScore, 7f, 0f);
scorer.doc = 3;
scorer.score = 11;
leafCollector.collect(3);
assertEquals(minValueChecker.getBottomValue(), 10, 0f);
assertEquals(scorer.minCompetitiveScore, 10f, 0f);
assertEquals(scorer2.minCompetitiveScore, 7f, 0f);
TopFieldCollector collector3 = manager.newCollector();
LeafCollector leafCollector3 = collector3.getLeafCollector(reader.leaves().get(2));
ScoreAndDoc scorer3 = new ScoreAndDoc();
leafCollector3.setScorer(scorer3);
assertEquals(scorer3.minCompetitiveScore, 10f, 0f);
scorer3.doc = 0;
scorer3.score = 1f;
leafCollector3.collect(0);
assertEquals(minValueChecker.getBottomValue(), 10f, 0f);
assertEquals(scorer3.minCompetitiveScore, 10f, 0f);
scorer.doc = 4;
scorer.score = 11;
leafCollector.collect(4);
assertEquals(minValueChecker.getBottomValue(), 11f, 0f);
assertEquals(scorer.minCompetitiveScore, 11f, 0f);
assertEquals(scorer2.minCompetitiveScore, 7f, 0f);
assertEquals(scorer3.minCompetitiveScore, 10f, 0f);
scorer3.doc = 1;
scorer3.score = 2f;
leafCollector3.collect(1);
assertEquals(minValueChecker.getBottomValue(), 11f, 0f);
assertEquals(scorer.minCompetitiveScore, 11f, 0f);
assertEquals(scorer2.minCompetitiveScore, 7f, 0f);
assertEquals(scorer3.minCompetitiveScore, 11f, 0f);
TopFieldDocs topDocs = manager.reduce(Arrays.asList(collector, collector2, collector3));
assertEquals(11, topDocs.totalHits.value);
assertEquals(new TotalHits(11, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), topDocs.totalHits);
reader.close();
dir.close();
}
}

View File

@ -495,4 +495,121 @@ public class TestTopFieldCollector extends LuceneTestCase {
dir.close();
}
public void testConcurrentMinScore() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE));
Document doc = new Document();
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
w.flush();
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc, doc));
w.flush();
w.addDocuments(Arrays.asList(doc, doc));
w.flush();
IndexReader reader = DirectoryReader.open(w);
assertEquals(3, reader.leaves().size());
w.close();
Sort sort = new Sort(SortField.FIELD_SCORE, SortField.FIELD_DOC);
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, 2, null, 0);
TopFieldCollector collector = manager.newCollector();
TopFieldCollector collector2 = manager.newCollector();
assertTrue(collector.bottomValueChecker == collector2.bottomValueChecker);
BottomValueChecker minValueChecker = collector.bottomValueChecker;
ScoreAndDoc scorer = new ScoreAndDoc();
ScoreAndDoc scorer2 = new ScoreAndDoc();
LeafCollector leafCollector = collector.getLeafCollector(reader.leaves().get(0));
leafCollector.setScorer(scorer);
LeafCollector leafCollector2 = collector2.getLeafCollector(reader.leaves().get(1));
leafCollector2.setScorer(scorer2);
scorer.doc = 0;
scorer.score = 3;
leafCollector.collect(0);
scorer2.doc = 0;
scorer2.score = 6;
leafCollector2.collect(0);
scorer.doc = 1;
scorer.score = 2;
leafCollector.collect(1);
assertEquals(minValueChecker.getBottomValue(), 2f, 0f);
assertEquals(scorer.minCompetitiveScore, 2f, 0f);
assertNull(scorer2.minCompetitiveScore);
scorer2.doc = 1;
scorer2.score = 9;
leafCollector2.collect(1);
assertEquals(minValueChecker.getBottomValue(), 6f, 0f);
assertEquals(scorer.minCompetitiveScore, 2f, 0f);
assertEquals(scorer2.minCompetitiveScore, 6f, 0f);
scorer2.doc = 2;
scorer2.score = 7;
leafCollector2.collect(2);
assertEquals(minValueChecker.getBottomValue(), 7f, 0f);
assertEquals(scorer.minCompetitiveScore, 2f, 0f);
assertEquals(scorer2.minCompetitiveScore, 7f, 0f);
scorer2.doc = 3;
scorer2.score = 1;
leafCollector2.collect(3);
assertEquals(minValueChecker.getBottomValue(), 7f, 0f);
assertEquals(scorer.minCompetitiveScore, 2f, 0f);
assertEquals(scorer2.minCompetitiveScore, 7f, 0f);
scorer.doc = 2;
scorer.score = 10;
leafCollector.collect(2);
assertEquals(minValueChecker.getBottomValue(), 7f, 0f);
assertEquals(scorer.minCompetitiveScore, 7f, 0f);
assertEquals(scorer2.minCompetitiveScore, 7f, 0f);
scorer.doc = 3;
scorer.score = 11;
leafCollector.collect(3);
assertEquals(minValueChecker.getBottomValue(), 10, 0f);
assertEquals(scorer.minCompetitiveScore, 10f, 0f);
assertEquals(scorer2.minCompetitiveScore, 7f, 0f);
TopFieldCollector collector3 = manager.newCollector();
LeafCollector leafCollector3 = collector3.getLeafCollector(reader.leaves().get(2));
ScoreAndDoc scorer3 = new ScoreAndDoc();
leafCollector3.setScorer(scorer3);
assertEquals(scorer3.minCompetitiveScore, 10f, 0f);
scorer3.doc = 0;
scorer3.score = 1f;
leafCollector3.collect(0);
assertEquals(minValueChecker.getBottomValue(), 10f, 0f);
assertEquals(scorer3.minCompetitiveScore, 10f, 0f);
scorer.doc = 4;
scorer.score = 11;
leafCollector.collect(4);
assertEquals(minValueChecker.getBottomValue(), 11f, 0f);
assertEquals(scorer.minCompetitiveScore, 11f, 0f);
assertEquals(scorer2.minCompetitiveScore, 7f, 0f);
assertEquals(scorer3.minCompetitiveScore, 10f, 0f);
scorer3.doc = 1;
scorer3.score = 2f;
leafCollector3.collect(1);
assertEquals(minValueChecker.getBottomValue(), 11f, 0f);
assertEquals(scorer.minCompetitiveScore, 11f, 0f);
assertEquals(scorer2.minCompetitiveScore, 7f, 0f);
assertEquals(scorer3.minCompetitiveScore, 11f, 0f);
TopFieldDocs topDocs = manager.reduce(Arrays.asList(collector, collector2, collector3));
assertEquals(11, topDocs.totalHits.value);
assertEquals(new TotalHits(11, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), topDocs.totalHits);
reader.close();
dir.close();
}
}