Merge pull request #904 from jimczi/shared_min_score

LUCENE-8992: Share minimum score across segment in concurrent search
This commit is contained in:
Jim Ferenczi 2019-10-21 15:03:12 +02:00 committed by GitHub
commit 066d324006
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 683 additions and 128 deletions

View File

@ -55,6 +55,9 @@ Improvements
* LUCENE-8984: MoreLikeThis MLT is biased for uncommon fields (Andy Hind via Anshum Gupta)
* LUCENE-8992: TopFieldCollector and TopScoreDocCollector can now share minimum scores across leaves
concurrently. (Atri Sharma, Jim Ferenczi)
Bug fixes
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while

View File

@ -1,52 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
/**
* Maintains the bottom value across multiple collectors
*/
abstract class BottomValueChecker {
/** Maintains global bottom score as the maximum of all bottom scores */
private static class MaximumBottomScoreChecker extends BottomValueChecker {
private volatile float maxMinScore;
@Override
public void updateThreadLocalBottomValue(float value) {
if (value <= maxMinScore) {
return;
}
synchronized (this) {
if (value > maxMinScore) {
maxMinScore = value;
}
}
}
@Override
public float getBottomValue() {
return maxMinScore;
}
}
public static BottomValueChecker createMaxBottomScoreChecker() {
return new MaximumBottomScoreChecker();
}
public abstract void updateThreadLocalBottomValue(float value);
public abstract float getBottomValue();
}

View File

@ -470,11 +470,11 @@ public class IndexSearcher {
private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(TOTAL_HITS_THRESHOLD) :
HitsThresholdChecker.createShared(TOTAL_HITS_THRESHOLD);
private final BottomValueChecker bottomValueChecker = BottomValueChecker.createMaxBottomScoreChecker();
private final MaxScoreAccumulator minScoreAcc = (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator();
@Override
public TopScoreDocCollector newCollector() throws IOException {
return TopScoreDocCollector.create(cappedNumHits, after, hitsThresholdChecker, bottomValueChecker);
return TopScoreDocCollector.create(cappedNumHits, after, hitsThresholdChecker, minScoreAcc);
}
@Override
@ -598,15 +598,17 @@ public class IndexSearcher {
final int cappedNumHits = Math.min(numHits, limit);
final Sort rewrittenSort = sort.rewrite(this);
final CollectorManager<TopFieldCollector, TopFieldDocs> manager = new CollectorManager<TopFieldCollector, TopFieldDocs>() {
final CollectorManager<TopFieldCollector, TopFieldDocs> manager = new CollectorManager<>() {
private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(TOTAL_HITS_THRESHOLD) :
HitsThresholdChecker.createShared(TOTAL_HITS_THRESHOLD);
private final MaxScoreAccumulator minScoreAcc = (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator();
@Override
public TopFieldCollector newCollector() throws IOException {
// TODO: don't pay the price for accurate hit counts by default
return TopFieldCollector.create(rewrittenSort, cappedNumHits, after, hitsThresholdChecker);
return TopFieldCollector.create(rewrittenSort, cappedNumHits, after, hitsThresholdChecker, minScoreAcc);
}
@Override

View File

@ -0,0 +1,90 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.util.concurrent.atomic.LongAccumulator;
/**
* Maintains the maximum score and its corresponding document id concurrently
*/
final class MaxScoreAccumulator {
// we use 2^10-1 to check the remainder with a bitwise operation
static final int DEFAULT_INTERVAL = 0x3ff;
// scores are always positive
final LongAccumulator acc = new LongAccumulator(Long::max, Long.MIN_VALUE);
// non-final and visible for tests
long modInterval;
MaxScoreAccumulator() {
this.modInterval = DEFAULT_INTERVAL;
}
void accumulate(int docID, float score) {
assert docID >= 0 && score >= 0;
long encode = (((long) Float.floatToIntBits(score)) << 32) | docID;
acc.accumulate(encode);
}
DocAndScore get() {
long value = acc.get();
if (value == Long.MIN_VALUE) {
return null;
}
float score = Float.intBitsToFloat((int) (value >> 32));
int docID = (int) value;
return new DocAndScore(docID, score);
}
static class DocAndScore implements Comparable<DocAndScore> {
final int docID;
final float score;
DocAndScore(int docID, float score) {
this.docID = docID;
this.score = score;
}
@Override
public int compareTo(DocAndScore o) {
int cmp = Float.compare(score, o.score);
if (cmp == 0) {
return Integer.compare(docID, o.docID);
}
return cmp;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
DocAndScore result = (DocAndScore) o;
return docID == result.docID &&
Float.compare(result.score, score) == 0;
}
@Override
public String toString() {
return "DocAndScore{" +
"docID=" + docID +
", score=" + score +
'}';
}
}
}

View File

@ -27,6 +27,7 @@ import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.search.FieldValueHitQueue.Entry;
import org.apache.lucene.search.MaxScoreAccumulator.DocAndScore;
import org.apache.lucene.search.TotalHits.Relation;
/**
@ -101,8 +102,9 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
final FieldValueHitQueue<Entry> queue;
public SimpleFieldCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits,
HitsThresholdChecker hitsThresholdChecker) {
super(queue, numHits, hitsThresholdChecker, sort.needsScores());
HitsThresholdChecker hitsThresholdChecker,
MaxScoreAccumulator minScoreAcc) {
super(queue, numHits, hitsThresholdChecker, sort.needsScores(), minScoreAcc);
this.sort = sort;
this.queue = queue;
}
@ -123,13 +125,22 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
@Override
public void setScorer(Scorable scorer) throws IOException {
super.setScorer(scorer);
minCompetitiveScore = 0f;
updateMinCompetitiveScore(scorer);
if (minScoreAcc != null) {
updateGlobalMinCompetitiveScore(scorer);
}
}
@Override
public void collect(int doc) throws IOException {
++totalHits;
hitsThresholdChecker.incrementHitCount();
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
updateGlobalMinCompetitiveScore(scorer);
}
if (queueFull) {
if (collectedAllCompetitiveHits || reverseMul * comparator.compareBottom(doc) <= 0) {
// since docs are visited in doc Id order, if compare is 0, it means
@ -143,8 +154,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
collectedAllCompetitiveHits = true;
}
} else if (totalHitsRelation == Relation.EQUAL_TO) {
// we just reached totalHitsThreshold, we can start setting the min
// competitive score now
// we can start setting the min competitive score if the
// threshold is reached for the first time here.
updateMinCompetitiveScore(scorer);
}
return;
@ -185,8 +196,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
final FieldDoc after;
public PagingFieldCollector(Sort sort, FieldValueHitQueue<Entry> queue, FieldDoc after, int numHits,
HitsThresholdChecker hitsThresholdChecker) {
super(queue, numHits, hitsThresholdChecker, sort.needsScores());
HitsThresholdChecker hitsThresholdChecker, MaxScoreAccumulator minScoreAcc) {
super(queue, numHits, hitsThresholdChecker, sort.needsScores(), minScoreAcc);
this.sort = sort;
this.queue = queue;
this.after = after;
@ -213,7 +224,11 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
@Override
public void setScorer(Scorable scorer) throws IOException {
super.setScorer(scorer);
minCompetitiveScore = 0f;
updateMinCompetitiveScore(scorer);
if (minScoreAcc != null) {
updateGlobalMinCompetitiveScore(scorer);
}
}
@Override
@ -223,6 +238,10 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
totalHits++;
hitsThresholdChecker.incrementHitCount();
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
updateGlobalMinCompetitiveScore(scorer);
}
if (queueFull) {
// Fastmatch: return if this hit is no better than
// the worst hit currently in the queue:
@ -237,7 +256,9 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
} else {
collectedAllCompetitiveHits = true;
}
} else if (totalHitsRelation == Relation.GREATER_THAN_OR_EQUAL_TO) {
} else if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
// we can start setting the min competitive score if the
// threshold is reached for the first time here.
updateMinCompetitiveScore(scorer);
}
return;
@ -247,6 +268,11 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
final int topCmp = reverseMul * comparator.compareTop(doc);
if (topCmp > 0 || (topCmp == 0 && doc <= afterDoc)) {
// Already collected on a previous page
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
// we just reached totalHitsThreshold, we can start setting the min
// competitive score now
updateMinCompetitiveScore(scorer);
}
return;
}
@ -286,6 +312,12 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
final HitsThresholdChecker hitsThresholdChecker;
final FieldComparator.RelevanceComparator firstComparator;
final boolean canSetMinScore;
// an accumulator that maintains the maximum of the segment's minimum competitive scores
final MaxScoreAccumulator minScoreAcc;
// the current local minimum competitive score already propagated to the underlying scorer
float minCompetitiveScore;
final int numComparators;
FieldValueHitQueue.Entry bottom = null;
boolean queueFull;
@ -299,7 +331,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
// visibility, then anyone will be able to extend the class, which is not what
// we want.
private TopFieldCollector(FieldValueHitQueue<Entry> pq, int numHits,
HitsThresholdChecker hitsThresholdChecker, boolean needsScores) {
HitsThresholdChecker hitsThresholdChecker, boolean needsScores,
MaxScoreAccumulator minScoreAcc) {
super(pq);
this.needsScores = needsScores;
this.numHits = numHits;
@ -318,6 +351,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
scoreMode = needsScores ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
canSetMinScore = false;
}
this.minScoreAcc = minScoreAcc;
}
@Override
@ -325,12 +359,36 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
return scoreMode;
}
protected void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException {
assert minScoreAcc != null;
if (canSetMinScore
&& hitsThresholdChecker.isThresholdReached()) {
// we can start checking the global maximum score even
// if the local queue is not full because the threshold
// is reached.
DocAndScore maxMinScore = minScoreAcc.get();
if (maxMinScore != null && maxMinScore.score > minCompetitiveScore) {
scorer.setMinCompetitiveScore(maxMinScore.score);
minCompetitiveScore = maxMinScore.score;
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
}
}
}
protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
if (canSetMinScore && hitsThresholdChecker.isThresholdReached() && queueFull) {
if (canSetMinScore
&& queueFull
&& hitsThresholdChecker.isThresholdReached()) {
assert bottom != null && firstComparator != null;
float minScore = firstComparator.value(bottom.slot);
if (minScore > minCompetitiveScore) {
scorer.setMinCompetitiveScore(minScore);
minCompetitiveScore = minScore;
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
if (minScoreAcc != null) {
minScoreAcc.accumulate(bottom.doc, minScore);
}
}
}
}
@ -389,14 +447,14 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
throw new IllegalArgumentException("totalHitsThreshold must be >= 0, got " + totalHitsThreshold);
}
return create(sort, numHits, after, HitsThresholdChecker.create(totalHitsThreshold));
return create(sort, numHits, after, HitsThresholdChecker.create(totalHitsThreshold), null /* bottomValueChecker */);
}
/**
* Same as above with an additional parameter to allow passing in the threshold checker
* Same as above with additional parameters to allow passing in the threshold checker and the max score accumulator.
*/
static TopFieldCollector create(Sort sort, int numHits, FieldDoc after,
HitsThresholdChecker hitsThresholdChecker) {
HitsThresholdChecker hitsThresholdChecker, MaxScoreAccumulator minScoreAcc) {
if (sort.fields.length == 0) {
throw new IllegalArgumentException("Sort must contain at least one field");
@ -413,7 +471,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
if (after == null) {
return new SimpleFieldCollector(sort, queue, numHits, hitsThresholdChecker);
return new SimpleFieldCollector(sort, queue, numHits, hitsThresholdChecker, minScoreAcc);
} else {
if (after.fields == null) {
throw new IllegalArgumentException("after.fields wasn't set; you must pass fillFields=true for the previous search");
@ -423,22 +481,25 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
throw new IllegalArgumentException("after.fields has " + after.fields.length + " values but sort has " + sort.getSort().length);
}
return new PagingFieldCollector(sort, queue, after, numHits, hitsThresholdChecker);
return new PagingFieldCollector(sort, queue, after, numHits, hitsThresholdChecker, minScoreAcc);
}
}
/**
* Create a CollectorManager which uses a shared hit counter to maintain number of hits
* and a shared {@link MaxScoreAccumulator} to propagate the minimum score accross segments if
* the primary sort is by relevancy.
*/
public static CollectorManager<TopFieldCollector, TopFieldDocs> createSharedManager(Sort sort, int numHits, FieldDoc after,
int totalHitsThreshold) {
return new CollectorManager<>() {
private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(totalHitsThreshold);
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
@Override
public TopFieldCollector newCollector() throws IOException {
return create(sort, numHits, after, hitsThresholdChecker);
return create(sort, numHits, after, hitsThresholdChecker, minScoreAcc);
}
@Override

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.MaxScoreAccumulator.DocAndScore;
/**
* A {@link Collector} implementation that collects the top-scoring hits,
@ -50,19 +51,24 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
private static class SimpleTopScoreDocCollector extends TopScoreDocCollector {
SimpleTopScoreDocCollector(int numHits, HitsThresholdChecker hitsThresholdChecker,
BottomValueChecker bottomValueChecker) {
super(numHits, hitsThresholdChecker, bottomValueChecker);
MaxScoreAccumulator minScoreAcc) {
super(numHits, hitsThresholdChecker, minScoreAcc);
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
final int docBase = context.docBase;
// reset the minimum competitive score
docBase = context.docBase;
return new ScorerLeafCollector() {
@Override
public void setScorer(Scorable scorer) throws IOException {
super.setScorer(scorer);
minCompetitiveScore = 0f;
updateMinCompetitiveScore(scorer);
if (minScoreAcc != null) {
updateGlobalMinCompetitiveScore(scorer);
}
}
@Override
@ -75,8 +81,12 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
totalHits++;
hitsThresholdChecker.incrementHitCount();
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
updateGlobalMinCompetitiveScore(scorer);
}
if (score <= pqTop.score) {
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO && hitsThresholdChecker.isThresholdReached()) {
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
// we just reached totalHitsThreshold, we can start setting the min
// competitive score now
updateMinCompetitiveScore(scorer);
@ -102,8 +112,8 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
private int collectedHits;
PagingTopScoreDocCollector(int numHits, ScoreDoc after, HitsThresholdChecker hitsThresholdChecker,
BottomValueChecker bottomValueChecker) {
super(numHits, hitsThresholdChecker, bottomValueChecker);
MaxScoreAccumulator minScoreAcc) {
super(numHits, hitsThresholdChecker, minScoreAcc);
this.after = after;
this.collectedHits = 0;
}
@ -123,7 +133,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
final int docBase = context.docBase;
docBase = context.docBase;
final int afterDoc = after.doc - context.docBase;
return new ScorerLeafCollector() {
@ -137,9 +147,13 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
totalHits++;
hitsThresholdChecker.incrementHitCount();
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
updateGlobalMinCompetitiveScore(scorer);
}
if (score > after.score || (score == after.score && doc <= afterDoc)) {
// hit was collected on a previous page
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO && hitsThresholdChecker.isThresholdReached()) {
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
// we just reached totalHitsThreshold, we can start setting the min
// competitive score now
updateMinCompetitiveScore(scorer);
@ -148,6 +162,12 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
}
if (score <= pqTop.score) {
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
// we just reached totalHitsThreshold, we can start setting the min
// competitive score now
updateMinCompetitiveScore(scorer);
}
// Since docs are returned in-order (i.e., increasing doc Id), a document
// with equal score to pqTop.score cannot compete since HitQueue favors
// documents with lower doc Ids. Therefore reject those docs too.
@ -201,7 +221,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
}
static TopScoreDocCollector create(int numHits, ScoreDoc after, HitsThresholdChecker hitsThresholdChecker,
BottomValueChecker bottomValueChecker) {
MaxScoreAccumulator minScoreAcc) {
if (numHits <= 0) {
throw new IllegalArgumentException("numHits must be > 0; please use TotalHitCountCollector if you just need the total hit count");
@ -212,25 +232,26 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
}
if (after == null) {
return new SimpleTopScoreDocCollector(numHits, hitsThresholdChecker, bottomValueChecker);
return new SimpleTopScoreDocCollector(numHits, hitsThresholdChecker, minScoreAcc);
} else {
return new PagingTopScoreDocCollector(numHits, after, hitsThresholdChecker, bottomValueChecker);
return new PagingTopScoreDocCollector(numHits, after, hitsThresholdChecker, minScoreAcc);
}
}
/**
* Create a CollectorManager which uses a shared hit counter to maintain number of hits
* and a shared {@link MaxScoreAccumulator} to propagate the minimum score accross segments
*/
public static CollectorManager<TopScoreDocCollector, TopDocs> createSharedManager(int numHits, FieldDoc after,
int totalHitsThreshold) {
return new CollectorManager<>() {
private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(totalHitsThreshold);
private final BottomValueChecker bottomValueChecker = BottomValueChecker.createMaxBottomScoreChecker();
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
@Override
public TopScoreDocCollector newCollector() throws IOException {
return TopScoreDocCollector.create(numHits, after, hitsThresholdChecker, bottomValueChecker);
return TopScoreDocCollector.create(numHits, after, hitsThresholdChecker, minScoreAcc);
}
@Override
@ -246,13 +267,15 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
};
}
int docBase;
ScoreDoc pqTop;
final HitsThresholdChecker hitsThresholdChecker;
final BottomValueChecker bottomValueChecker;
final MaxScoreAccumulator minScoreAcc;
float minCompetitiveScore;
// prevents instantiation
TopScoreDocCollector(int numHits, HitsThresholdChecker hitsThresholdChecker,
BottomValueChecker bottomValueChecker) {
MaxScoreAccumulator minScoreAcc) {
super(new HitQueue(numHits, true));
assert hitsThresholdChecker != null;
@ -260,7 +283,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
// that at this point top() is already initialized.
pqTop = pq.top();
this.hitsThresholdChecker = hitsThresholdChecker;
this.bottomValueChecker = bottomValueChecker;
this.minScoreAcc = minScoreAcc;
}
@Override
@ -277,31 +300,41 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
return hitsThresholdChecker.scoreMode();
}
protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
if (hitsThresholdChecker.isThresholdReached()
&& ((bottomValueChecker != null && bottomValueChecker.getBottomValue() > 0)
|| (pqTop != null && pqTop.score != Float.NEGATIVE_INFINITY))) { // -Infinity is the score of sentinels
// since we tie-break on doc id and collect in doc id order, we can require
// the next float
float bottomScore = Float.NEGATIVE_INFINITY;
if (pqTop != null && pqTop.score != Float.NEGATIVE_INFINITY) {
bottomScore = Math.nextUp(pqTop.score);
if (bottomValueChecker != null) {
bottomValueChecker.updateThreadLocalBottomValue(pqTop.score);
}
}
// Global bottom can only be greater than or equal to the local bottom score
// The updating of global bottom score for this hit before getting here should
// ensure that
if (bottomValueChecker != null && bottomValueChecker.getBottomValue() > bottomScore) {
bottomScore = bottomValueChecker.getBottomValue();
}
scorer.setMinCompetitiveScore(bottomScore);
protected void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException {
assert minScoreAcc != null;
DocAndScore maxMinScore = minScoreAcc.get();
if (maxMinScore != null) {
// since we tie-break on doc id and collect in doc id order we can require
// the next float if the global minimum score is set on a document id that is
// smaller than the ids in the current leaf
float score = docBase > maxMinScore.docID ? Math.nextUp(maxMinScore.score) : maxMinScore.score;
if (score > minCompetitiveScore) {
assert hitsThresholdChecker.isThresholdReached();
scorer.setMinCompetitiveScore(score);
minCompetitiveScore = score;
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
}
}
}
protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
if (hitsThresholdChecker.isThresholdReached()
&& pqTop != null
&& pqTop.score != Float.NEGATIVE_INFINITY) { // -Infinity is the score of sentinels
// since we tie-break on doc id and collect in doc id order, we can require
// the next float
float localMinScore = Math.nextUp(pqTop.score);
if (localMinScore > minCompetitiveScore) {
scorer.setMinCompetitiveScore(localMinScore);
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
minCompetitiveScore = localMinScore;
if (minScoreAcc != null) {
// we don't use the next float but we register the document
// id so that other leaves can require it if they are after
// the current maximum
minScoreAcc.accumulate(pqTop.doc, pqTop.score);
}
}
}
}
}

View File

@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import org.apache.lucene.util.LuceneTestCase;
public class TestMaxScoreAccumulator extends LuceneTestCase {
public void testSimple() {
MaxScoreAccumulator acc = new MaxScoreAccumulator();
acc.accumulate(0, 0f);
acc.accumulate(10, 0f);
assertEquals(0f, acc.get().score, 0);
assertEquals(10, acc.get().docID, 0);
acc.accumulate(100, 1000f);
assertEquals(1000f, acc.get().score, 0);
assertEquals(100, acc.get().docID, 0);
acc.accumulate(1000, 5f);
assertEquals(1000f, acc.get().score, 0);
assertEquals(100, acc.get().docID, 0);
acc.accumulate(99, 1000f);
assertEquals(1000f, acc.get().score, 0);
assertEquals(100, acc.get().docID, 0);
acc.accumulate(0, 1001f);
assertEquals(1001f, acc.get().score, 0);
assertEquals(0, acc.get().docID, 0);
}
public void testRandom() {
MaxScoreAccumulator acc = new MaxScoreAccumulator();
int numDocs = atLeast(100);
int maxDocs = atLeast(10000);
MaxScoreAccumulator.DocAndScore max = new MaxScoreAccumulator.DocAndScore(-1, -1);
for (int i = 0; i < numDocs; i++) {
MaxScoreAccumulator.DocAndScore res = new MaxScoreAccumulator.DocAndScore(random().nextInt(maxDocs), random().nextFloat());
acc.accumulate(res.docID, res.score);
if (res.compareTo(max) > 0) {
max = res;
}
}
assertEquals(max, acc.get());
}
}

View File

@ -25,6 +25,8 @@ import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
@ -112,29 +114,27 @@ public class TestTopDocsCollector extends LuceneTestCase {
return tdc;
}
private TopDocsCollector<ScoreDoc> doSearchWithThreshold(int numResults, int thresHold) throws IOException {
Query q = new MatchAllDocsQuery();
IndexSearcher searcher = newSearcher(reader);
private TopDocsCollector<ScoreDoc> doSearchWithThreshold(int numResults, int thresHold, Query q, IndexReader indexReader) throws IOException {
IndexSearcher searcher = new IndexSearcher(indexReader);
TopDocsCollector<ScoreDoc> tdc = TopScoreDocCollector.create(numResults, thresHold);
searcher.search(q, tdc);
return tdc;
}
private TopDocs doConcurrentSearchWithThreshold(int numResults, int threshold, IndexReader reader) throws IOException {
Query q = new MatchAllDocsQuery();
private TopDocs doConcurrentSearchWithThreshold(int numResults, int threshold, Query q, IndexReader indexReader) throws IOException {
ExecutorService service = new ThreadPoolExecutor(4, 4, 0L, TimeUnit.MILLISECONDS,
new LinkedBlockingQueue<Runnable>(),
new NamedThreadFactory("TestTopDocsCollector"));
IndexSearcher searcher = new IndexSearcher(reader, service);
try {
IndexSearcher searcher = new IndexSearcher(indexReader, service);
CollectorManager collectorManager = TopScoreDocCollector.createSharedManager(numResults,
null, threshold);
TopDocs tdc = (TopDocs) searcher.search(q, collectorManager);
return (TopDocs) searcher.search(q, collectorManager);
} finally {
service.shutdown();
return tdc;
}
}
@Override
@ -344,8 +344,8 @@ public class TestTopDocsCollector extends LuceneTestCase {
assertEquals(2, reader.leaves().size());
w.close();
TopDocsCollector collector = doSearchWithThreshold(5, 10);
TopDocs tdc = doConcurrentSearchWithThreshold(5, 10, reader);
TopDocsCollector collector = doSearchWithThreshold( 5, 10, q, reader);
TopDocs tdc = doConcurrentSearchWithThreshold(5, 10, q, reader);
TopDocs tdc2 = collector.topDocs();
CheckHits.checkEqual(q, tdc.scoreDocs, tdc2.scoreDocs);
@ -402,7 +402,174 @@ public class TestTopDocsCollector extends LuceneTestCase {
dir.close();
}
public void testGlobalScore() throws Exception {
public void testConcurrentMinScore() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE));
Document doc = new Document();
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
w.flush();
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc, doc));
w.flush();
w.addDocuments(Arrays.asList(doc, doc));
w.flush();
IndexReader reader = DirectoryReader.open(w);
assertEquals(3, reader.leaves().size());
w.close();
CollectorManager<TopScoreDocCollector, TopDocs> manager =
TopScoreDocCollector.createSharedManager(2, null, 0);
TopScoreDocCollector collector = manager.newCollector();
TopScoreDocCollector collector2 = manager.newCollector();
assertTrue(collector.minScoreAcc == collector2.minScoreAcc);
MaxScoreAccumulator minValueChecker = collector.minScoreAcc;
// force the check of the global minimum score on every round
minValueChecker.modInterval = 0;
ScoreAndDoc scorer = new ScoreAndDoc();
ScoreAndDoc scorer2 = new ScoreAndDoc();
LeafCollector leafCollector = collector.getLeafCollector(reader.leaves().get(0));
leafCollector.setScorer(scorer);
LeafCollector leafCollector2 = collector2.getLeafCollector(reader.leaves().get(1));
leafCollector2.setScorer(scorer2);
scorer.doc = 0;
scorer.score = 3;
leafCollector.collect(0);
assertNull(minValueChecker.get());
assertNull(scorer.minCompetitiveScore);
scorer2.doc = 0;
scorer2.score = 6;
leafCollector2.collect(0);
assertNull(minValueChecker.get());
assertNull(scorer2.minCompetitiveScore);
scorer.doc = 1;
scorer.score = 2;
leafCollector.collect(1);
assertEquals(2f, minValueChecker.get().score, 0f);
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
assertNull(scorer2.minCompetitiveScore);
scorer2.doc = 1;
scorer2.score = 9;
leafCollector2.collect(1);
assertEquals(6f, minValueChecker.get().score, 0f);
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(6f), scorer2.minCompetitiveScore, 0f);
scorer2.doc = 2;
scorer2.score = 7;
leafCollector2.collect(2);
assertEquals(minValueChecker.get().score, 7f, 0f);
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f);
scorer2.doc = 3;
scorer2.score = 1;
leafCollector2.collect(3);
assertEquals(minValueChecker.get().score, 7f, 0f);
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f);
scorer.doc = 2;
scorer.score = 10;
leafCollector.collect(2);
assertEquals(minValueChecker.get().score, 7f, 0f);
assertEquals(7f, scorer.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f);
scorer.doc = 3;
scorer.score = 11;
leafCollector.collect(3);
assertEquals(minValueChecker.get().score, 10, 0f);
assertEquals(Math.nextUp(10f), scorer.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f);
TopScoreDocCollector collector3 = manager.newCollector();
LeafCollector leafCollector3 = collector3.getLeafCollector(reader.leaves().get(2));
ScoreAndDoc scorer3 = new ScoreAndDoc();
leafCollector3.setScorer(scorer3);
assertEquals(Math.nextUp(10f), scorer3.minCompetitiveScore, 0f);
scorer3.doc = 0;
scorer3.score = 1f;
leafCollector3.collect(0);
assertEquals(10f, minValueChecker.get().score, 0f);
assertEquals(Math.nextUp(10f), scorer3.minCompetitiveScore, 0f);
scorer.doc = 4;
scorer.score = 11;
leafCollector.collect(4);
assertEquals(11f, minValueChecker.get().score, 0f);
assertEquals(Math.nextUp(11f), scorer.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(10f), scorer3.minCompetitiveScore, 0f);
scorer3.doc = 1;
scorer3.score = 2f;
leafCollector3.collect(1);
assertEquals(minValueChecker.get().score, 11f, 0f);
assertEquals(Math.nextUp(11f), scorer.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(11f), scorer3.minCompetitiveScore, 0f);
TopDocs topDocs = manager.reduce(Arrays.asList(collector, collector2, collector3));
assertEquals(11, topDocs.totalHits.value);
assertEquals(new TotalHits(11, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), topDocs.totalHits);
reader.close();
dir.close();
}
public void testRandomMinCompetitiveScore() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
int numDocs = atLeast(1000);
for (int i = 0; i < numDocs; ++i) {
int numAs = 1 + random().nextInt(5);
int numBs = random().nextFloat() < 0.5f ? 0 : 1 + random().nextInt(5);
int numCs = random().nextFloat() < 0.1f ? 0 : 1 + random().nextInt(5);
Document doc = new Document();
for (int j = 0; j < numAs; ++j) {
doc.add(new StringField("f", "A", Field.Store.NO));
}
for (int j = 0; j < numBs; ++j) {
doc.add(new StringField("f", "B", Field.Store.NO));
}
for (int j = 0; j < numCs; ++j) {
doc.add(new StringField("f", "C", Field.Store.NO));
}
w.addDocument(doc);
}
IndexReader indexReader = w.getReader();
w.close();
Query[] queries = new Query[]{
new TermQuery(new Term("f", "A")),
new TermQuery(new Term("f", "B")),
new TermQuery(new Term("f", "C")),
new BooleanQuery.Builder()
.add(new TermQuery(new Term("f", "A")), BooleanClause.Occur.MUST)
.add(new TermQuery(new Term("f", "B")), BooleanClause.Occur.SHOULD)
.build()
};
for (Query query : queries) {
TopDocsCollector collector = doSearchWithThreshold(5, 0, query, indexReader);
TopDocs tdc = doConcurrentSearchWithThreshold(5, 0, query, indexReader);
TopDocs tdc2 = collector.topDocs();
assertTrue(tdc.totalHits.value > 0);
assertTrue(tdc2.totalHits.value > 0);
CheckHits.checkEqual(query, tdc.scoreDocs, tdc2.scoreDocs);
}
indexReader.close();
dir.close();
}
public void testRealisticConcurrentMinimumScore() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
try (LineFileDocs docs = new LineFileDocs(random())) {
@ -432,8 +599,8 @@ public class TestTopDocsCollector extends LuceneTestCase {
BytesRef term = BytesRef.deepCopyOf(termsEnum.term());
Query query = new TermQuery(new Term("body", term));
TopDocsCollector collector = doSearchWithThreshold(5, 10);
TopDocs tdc = doConcurrentSearchWithThreshold(5, 10, reader);
TopDocsCollector collector = doSearchWithThreshold(5, 0, query, reader);
TopDocs tdc = doConcurrentSearchWithThreshold(5, 0, query, reader);
TopDocs tdc2 = collector.topDocs();
CheckHits.checkEqual(query, tdc.scoreDocs, tdc2.scoreDocs);
@ -443,5 +610,4 @@ public class TestTopDocsCollector extends LuceneTestCase {
reader.close();
dir.close();
}
}

View File

@ -26,6 +26,7 @@ import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
@ -73,6 +74,31 @@ public class TestTopFieldCollector extends LuceneTestCase {
super.tearDown();
}
private TopFieldCollector doSearchWithThreshold(int numResults, int thresHold, Query q, Sort sort, IndexReader indexReader) throws IOException {
IndexSearcher searcher = new IndexSearcher(indexReader);
TopFieldCollector tdc = TopFieldCollector.create(sort, numResults, thresHold);
searcher.search(q, tdc);
return tdc;
}
private TopDocs doConcurrentSearchWithThreshold(int numResults, int threshold, Query q, Sort sort, IndexReader indexReader) throws IOException {
ExecutorService service = new ThreadPoolExecutor(4, 4, 0L, TimeUnit.MILLISECONDS,
new LinkedBlockingQueue<Runnable>(),
new NamedThreadFactory("TestTopDocsCollector"));
try {
IndexSearcher searcher = new IndexSearcher(indexReader, service);
CollectorManager collectorManager = TopFieldCollector.createSharedManager(sort, numResults,
null, threshold);
TopDocs tdc = (TopDocs) searcher.search(q, collectorManager);
return tdc;
} finally {
service.shutdown();
}
}
public void testSortWithoutFillFields() throws Exception {
// There was previously a bug in TopFieldCollector when fillFields was set
@ -495,4 +521,173 @@ public class TestTopFieldCollector extends LuceneTestCase {
dir.close();
}
public void testConcurrentMinScore() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE));
Document doc = new Document();
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
w.flush();
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc, doc));
w.flush();
w.addDocuments(Arrays.asList(doc, doc));
w.flush();
IndexReader reader = DirectoryReader.open(w);
assertEquals(3, reader.leaves().size());
w.close();
Sort sort = new Sort(SortField.FIELD_SCORE, SortField.FIELD_DOC);
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, 2, null, 0);
TopFieldCollector collector = manager.newCollector();
TopFieldCollector collector2 = manager.newCollector();
assertTrue(collector.minScoreAcc == collector2.minScoreAcc);
MaxScoreAccumulator minValueChecker = collector.minScoreAcc;
// force the check of the global minimum score on every round
minValueChecker.modInterval = 0;
ScoreAndDoc scorer = new ScoreAndDoc();
ScoreAndDoc scorer2 = new ScoreAndDoc();
LeafCollector leafCollector = collector.getLeafCollector(reader.leaves().get(0));
leafCollector.setScorer(scorer);
LeafCollector leafCollector2 = collector2.getLeafCollector(reader.leaves().get(1));
leafCollector2.setScorer(scorer2);
scorer.doc = 0;
scorer.score = 3;
leafCollector.collect(0);
assertNull(minValueChecker.get());
assertNull(scorer.minCompetitiveScore);
scorer2.doc = 0;
scorer2.score = 6;
leafCollector2.collect(0);
assertNull(minValueChecker.get());
assertNull(scorer2.minCompetitiveScore);
scorer.doc = 1;
scorer.score = 2;
leafCollector.collect(1);
assertEquals(2f, minValueChecker.get().score, 0f);
assertEquals(2f, scorer.minCompetitiveScore, 0f);
assertNull(scorer2.minCompetitiveScore);
scorer2.doc = 1;
scorer2.score = 9;
leafCollector2.collect(1);
assertEquals(6f, minValueChecker.get().score, 0f);
assertEquals(2f, scorer.minCompetitiveScore, 0f);
assertEquals(6f, scorer2.minCompetitiveScore, 0f);
scorer2.doc = 2;
scorer2.score = 7;
leafCollector2.collect(2);
assertEquals(7f, minValueChecker.get().score, 0f);
assertEquals(2f, scorer.minCompetitiveScore, 0f);
assertEquals(7f, scorer2.minCompetitiveScore, 0f);
scorer2.doc = 3;
scorer2.score = 1;
leafCollector2.collect(3);
assertEquals(7f, minValueChecker.get().score, 0f);
assertEquals(2f, scorer.minCompetitiveScore, 0f);
assertEquals(7f, scorer2.minCompetitiveScore, 0f);
scorer.doc = 2;
scorer.score = 10;
leafCollector.collect(2);
assertEquals(7f, minValueChecker.get().score, 0f);
assertEquals(7f, scorer.minCompetitiveScore, 0f);
assertEquals(7f, scorer2.minCompetitiveScore, 0f);
scorer.doc = 3;
scorer.score = 11;
leafCollector.collect(3);
assertEquals(10f, minValueChecker.get().score, 0f);
assertEquals(10f, scorer.minCompetitiveScore, 0f);
assertEquals(7f, scorer2.minCompetitiveScore, 0f);
TopFieldCollector collector3 = manager.newCollector();
LeafCollector leafCollector3 = collector3.getLeafCollector(reader.leaves().get(2));
ScoreAndDoc scorer3 = new ScoreAndDoc();
leafCollector3.setScorer(scorer3);
assertEquals(10f, scorer3.minCompetitiveScore, 0f);
scorer3.doc = 0;
scorer3.score = 1f;
leafCollector3.collect(0);
assertEquals(10f, minValueChecker.get().score, 0f);
assertEquals(10f, scorer3.minCompetitiveScore, 0f);
scorer.doc = 4;
scorer.score = 11;
leafCollector.collect(4);
assertEquals(11f, minValueChecker.get().score, 0f);
assertEquals(11f, scorer.minCompetitiveScore, 0f);
assertEquals(7f, scorer2.minCompetitiveScore, 0f);
assertEquals(10f, scorer3.minCompetitiveScore, 0f);
scorer3.doc = 1;
scorer3.score = 2f;
leafCollector3.collect(1);
assertEquals(11f, minValueChecker.get().score, 0f);
assertEquals(11f, scorer.minCompetitiveScore, 0f);
assertEquals(7f, scorer2.minCompetitiveScore, 0f);
assertEquals(11f, scorer3.minCompetitiveScore, 0f);
TopFieldDocs topDocs = manager.reduce(Arrays.asList(collector, collector2, collector3));
assertEquals(11, topDocs.totalHits.value);
assertEquals(new TotalHits(11, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), topDocs.totalHits);
reader.close();
dir.close();
}
public void testRandomMinCompetitiveScore() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
int numDocs = atLeast(1000);
for (int i = 0; i < numDocs; ++i) {
int numAs = 1 + random().nextInt(5);
int numBs = random().nextFloat() < 0.5f ? 0 : 1 + random().nextInt(5);
int numCs = random().nextFloat() < 0.1f ? 0 : 1 + random().nextInt(5);
Document doc = new Document();
for (int j = 0; j < numAs; ++j) {
doc.add(new StringField("f", "A", Field.Store.NO));
}
for (int j = 0; j < numBs; ++j) {
doc.add(new StringField("f", "B", Field.Store.NO));
}
for (int j = 0; j < numCs; ++j) {
doc.add(new StringField("f", "C", Field.Store.NO));
}
w.addDocument(doc);
}
IndexReader indexReader = w.getReader();
w.close();
Query[] queries = new Query[]{
new TermQuery(new Term("f", "A")),
new TermQuery(new Term("f", "B")),
new TermQuery(new Term("f", "C")),
new BooleanQuery.Builder()
.add(new TermQuery(new Term("f", "A")), BooleanClause.Occur.MUST)
.add(new TermQuery(new Term("f", "B")), BooleanClause.Occur.SHOULD)
.build()
};
for (Query query : queries) {
Sort sort = new Sort(new SortField[]{SortField.FIELD_SCORE, SortField.FIELD_DOC});
TopFieldCollector fieldCollector = doSearchWithThreshold(5, 0, query, sort, indexReader);
TopDocs tdc = doConcurrentSearchWithThreshold(5, 0, query, sort, indexReader);
TopDocs tdc2 = fieldCollector.topDocs();
assertTrue(tdc.totalHits.value > 0);
assertTrue(tdc2.totalHits.value > 0);
CheckHits.checkEqual(query, tdc.scoreDocs, tdc2.scoreDocs);
}
indexReader.close();
dir.close();
}
}