mirror of https://github.com/apache/lucene.git
Merge pull request #904 from jimczi/shared_min_score
LUCENE-8992: Share minimum score across segment in concurrent search
This commit is contained in:
commit
066d324006
|
@ -55,6 +55,9 @@ Improvements
|
|||
|
||||
* LUCENE-8984: MoreLikeThis MLT is biased for uncommon fields (Andy Hind via Anshum Gupta)
|
||||
|
||||
* LUCENE-8992: TopFieldCollector and TopScoreDocCollector can now share minimum scores across leaves
|
||||
concurrently. (Atri Sharma, Jim Ferenczi)
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while
|
||||
|
|
|
@ -1,52 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Maintains the bottom value across multiple collectors
|
||||
*/
|
||||
abstract class BottomValueChecker {
|
||||
/** Maintains global bottom score as the maximum of all bottom scores */
|
||||
private static class MaximumBottomScoreChecker extends BottomValueChecker {
|
||||
private volatile float maxMinScore;
|
||||
|
||||
@Override
|
||||
public void updateThreadLocalBottomValue(float value) {
|
||||
if (value <= maxMinScore) {
|
||||
return;
|
||||
}
|
||||
synchronized (this) {
|
||||
if (value > maxMinScore) {
|
||||
maxMinScore = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getBottomValue() {
|
||||
return maxMinScore;
|
||||
}
|
||||
}
|
||||
|
||||
public static BottomValueChecker createMaxBottomScoreChecker() {
|
||||
return new MaximumBottomScoreChecker();
|
||||
}
|
||||
|
||||
public abstract void updateThreadLocalBottomValue(float value);
|
||||
public abstract float getBottomValue();
|
||||
}
|
|
@ -470,11 +470,11 @@ public class IndexSearcher {
|
|||
private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(TOTAL_HITS_THRESHOLD) :
|
||||
HitsThresholdChecker.createShared(TOTAL_HITS_THRESHOLD);
|
||||
|
||||
private final BottomValueChecker bottomValueChecker = BottomValueChecker.createMaxBottomScoreChecker();
|
||||
private final MaxScoreAccumulator minScoreAcc = (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator();
|
||||
|
||||
@Override
|
||||
public TopScoreDocCollector newCollector() throws IOException {
|
||||
return TopScoreDocCollector.create(cappedNumHits, after, hitsThresholdChecker, bottomValueChecker);
|
||||
return TopScoreDocCollector.create(cappedNumHits, after, hitsThresholdChecker, minScoreAcc);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -598,15 +598,17 @@ public class IndexSearcher {
|
|||
final int cappedNumHits = Math.min(numHits, limit);
|
||||
final Sort rewrittenSort = sort.rewrite(this);
|
||||
|
||||
final CollectorManager<TopFieldCollector, TopFieldDocs> manager = new CollectorManager<TopFieldCollector, TopFieldDocs>() {
|
||||
final CollectorManager<TopFieldCollector, TopFieldDocs> manager = new CollectorManager<>() {
|
||||
|
||||
private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(TOTAL_HITS_THRESHOLD) :
|
||||
HitsThresholdChecker.createShared(TOTAL_HITS_THRESHOLD);
|
||||
|
||||
private final MaxScoreAccumulator minScoreAcc = (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator();
|
||||
|
||||
@Override
|
||||
public TopFieldCollector newCollector() throws IOException {
|
||||
// TODO: don't pay the price for accurate hit counts by default
|
||||
return TopFieldCollector.create(rewrittenSort, cappedNumHits, after, hitsThresholdChecker);
|
||||
return TopFieldCollector.create(rewrittenSort, cappedNumHits, after, hitsThresholdChecker, minScoreAcc);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.util.concurrent.atomic.LongAccumulator;
|
||||
|
||||
/**
|
||||
* Maintains the maximum score and its corresponding document id concurrently
|
||||
*/
|
||||
final class MaxScoreAccumulator {
|
||||
// we use 2^10-1 to check the remainder with a bitwise operation
|
||||
static final int DEFAULT_INTERVAL = 0x3ff;
|
||||
|
||||
// scores are always positive
|
||||
final LongAccumulator acc = new LongAccumulator(Long::max, Long.MIN_VALUE);
|
||||
|
||||
// non-final and visible for tests
|
||||
long modInterval;
|
||||
|
||||
MaxScoreAccumulator() {
|
||||
this.modInterval = DEFAULT_INTERVAL;
|
||||
}
|
||||
|
||||
void accumulate(int docID, float score) {
|
||||
assert docID >= 0 && score >= 0;
|
||||
long encode = (((long) Float.floatToIntBits(score)) << 32) | docID;
|
||||
acc.accumulate(encode);
|
||||
}
|
||||
|
||||
DocAndScore get() {
|
||||
long value = acc.get();
|
||||
if (value == Long.MIN_VALUE) {
|
||||
return null;
|
||||
}
|
||||
float score = Float.intBitsToFloat((int) (value >> 32));
|
||||
int docID = (int) value;
|
||||
return new DocAndScore(docID, score);
|
||||
}
|
||||
|
||||
static class DocAndScore implements Comparable<DocAndScore> {
|
||||
final int docID;
|
||||
final float score;
|
||||
|
||||
DocAndScore(int docID, float score) {
|
||||
this.docID = docID;
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(DocAndScore o) {
|
||||
int cmp = Float.compare(score, o.score);
|
||||
if (cmp == 0) {
|
||||
return Integer.compare(docID, o.docID);
|
||||
}
|
||||
return cmp;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
DocAndScore result = (DocAndScore) o;
|
||||
return docID == result.docID &&
|
||||
Float.compare(result.score, score) == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "DocAndScore{" +
|
||||
"docID=" + docID +
|
||||
", score=" + score +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
}
|
|
@ -27,6 +27,7 @@ import java.util.Objects;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.search.FieldValueHitQueue.Entry;
|
||||
import org.apache.lucene.search.MaxScoreAccumulator.DocAndScore;
|
||||
import org.apache.lucene.search.TotalHits.Relation;
|
||||
|
||||
/**
|
||||
|
@ -101,8 +102,9 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
final FieldValueHitQueue<Entry> queue;
|
||||
|
||||
public SimpleFieldCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits,
|
||||
HitsThresholdChecker hitsThresholdChecker) {
|
||||
super(queue, numHits, hitsThresholdChecker, sort.needsScores());
|
||||
HitsThresholdChecker hitsThresholdChecker,
|
||||
MaxScoreAccumulator minScoreAcc) {
|
||||
super(queue, numHits, hitsThresholdChecker, sort.needsScores(), minScoreAcc);
|
||||
this.sort = sort;
|
||||
this.queue = queue;
|
||||
}
|
||||
|
@ -123,13 +125,22 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
super.setScorer(scorer);
|
||||
minCompetitiveScore = 0f;
|
||||
updateMinCompetitiveScore(scorer);
|
||||
if (minScoreAcc != null) {
|
||||
updateGlobalMinCompetitiveScore(scorer);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
++totalHits;
|
||||
hitsThresholdChecker.incrementHitCount();
|
||||
|
||||
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
|
||||
updateGlobalMinCompetitiveScore(scorer);
|
||||
}
|
||||
|
||||
if (queueFull) {
|
||||
if (collectedAllCompetitiveHits || reverseMul * comparator.compareBottom(doc) <= 0) {
|
||||
// since docs are visited in doc Id order, if compare is 0, it means
|
||||
|
@ -143,8 +154,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
collectedAllCompetitiveHits = true;
|
||||
}
|
||||
} else if (totalHitsRelation == Relation.EQUAL_TO) {
|
||||
// we just reached totalHitsThreshold, we can start setting the min
|
||||
// competitive score now
|
||||
// we can start setting the min competitive score if the
|
||||
// threshold is reached for the first time here.
|
||||
updateMinCompetitiveScore(scorer);
|
||||
}
|
||||
return;
|
||||
|
@ -185,8 +196,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
final FieldDoc after;
|
||||
|
||||
public PagingFieldCollector(Sort sort, FieldValueHitQueue<Entry> queue, FieldDoc after, int numHits,
|
||||
HitsThresholdChecker hitsThresholdChecker) {
|
||||
super(queue, numHits, hitsThresholdChecker, sort.needsScores());
|
||||
HitsThresholdChecker hitsThresholdChecker, MaxScoreAccumulator minScoreAcc) {
|
||||
super(queue, numHits, hitsThresholdChecker, sort.needsScores(), minScoreAcc);
|
||||
this.sort = sort;
|
||||
this.queue = queue;
|
||||
this.after = after;
|
||||
|
@ -213,7 +224,11 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
super.setScorer(scorer);
|
||||
minCompetitiveScore = 0f;
|
||||
updateMinCompetitiveScore(scorer);
|
||||
if (minScoreAcc != null) {
|
||||
updateGlobalMinCompetitiveScore(scorer);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -223,6 +238,10 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
totalHits++;
|
||||
hitsThresholdChecker.incrementHitCount();
|
||||
|
||||
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
|
||||
updateGlobalMinCompetitiveScore(scorer);
|
||||
}
|
||||
|
||||
if (queueFull) {
|
||||
// Fastmatch: return if this hit is no better than
|
||||
// the worst hit currently in the queue:
|
||||
|
@ -237,8 +256,10 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
} else {
|
||||
collectedAllCompetitiveHits = true;
|
||||
}
|
||||
} else if (totalHitsRelation == Relation.GREATER_THAN_OR_EQUAL_TO) {
|
||||
updateMinCompetitiveScore(scorer);
|
||||
} else if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
|
||||
// we can start setting the min competitive score if the
|
||||
// threshold is reached for the first time here.
|
||||
updateMinCompetitiveScore(scorer);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -247,6 +268,11 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
final int topCmp = reverseMul * comparator.compareTop(doc);
|
||||
if (topCmp > 0 || (topCmp == 0 && doc <= afterDoc)) {
|
||||
// Already collected on a previous page
|
||||
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
|
||||
// we just reached totalHitsThreshold, we can start setting the min
|
||||
// competitive score now
|
||||
updateMinCompetitiveScore(scorer);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -286,6 +312,12 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
final HitsThresholdChecker hitsThresholdChecker;
|
||||
final FieldComparator.RelevanceComparator firstComparator;
|
||||
final boolean canSetMinScore;
|
||||
|
||||
// an accumulator that maintains the maximum of the segment's minimum competitive scores
|
||||
final MaxScoreAccumulator minScoreAcc;
|
||||
// the current local minimum competitive score already propagated to the underlying scorer
|
||||
float minCompetitiveScore;
|
||||
|
||||
final int numComparators;
|
||||
FieldValueHitQueue.Entry bottom = null;
|
||||
boolean queueFull;
|
||||
|
@ -299,7 +331,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
// visibility, then anyone will be able to extend the class, which is not what
|
||||
// we want.
|
||||
private TopFieldCollector(FieldValueHitQueue<Entry> pq, int numHits,
|
||||
HitsThresholdChecker hitsThresholdChecker, boolean needsScores) {
|
||||
HitsThresholdChecker hitsThresholdChecker, boolean needsScores,
|
||||
MaxScoreAccumulator minScoreAcc) {
|
||||
super(pq);
|
||||
this.needsScores = needsScores;
|
||||
this.numHits = numHits;
|
||||
|
@ -318,6 +351,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
scoreMode = needsScores ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
|
||||
canSetMinScore = false;
|
||||
}
|
||||
this.minScoreAcc = minScoreAcc;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -325,12 +359,36 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
return scoreMode;
|
||||
}
|
||||
|
||||
protected void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException {
|
||||
assert minScoreAcc != null;
|
||||
if (canSetMinScore
|
||||
&& hitsThresholdChecker.isThresholdReached()) {
|
||||
// we can start checking the global maximum score even
|
||||
// if the local queue is not full because the threshold
|
||||
// is reached.
|
||||
DocAndScore maxMinScore = minScoreAcc.get();
|
||||
if (maxMinScore != null && maxMinScore.score > minCompetitiveScore) {
|
||||
scorer.setMinCompetitiveScore(maxMinScore.score);
|
||||
minCompetitiveScore = maxMinScore.score;
|
||||
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
|
||||
if (canSetMinScore && hitsThresholdChecker.isThresholdReached() && queueFull) {
|
||||
if (canSetMinScore
|
||||
&& queueFull
|
||||
&& hitsThresholdChecker.isThresholdReached()) {
|
||||
assert bottom != null && firstComparator != null;
|
||||
float minScore = firstComparator.value(bottom.slot);
|
||||
scorer.setMinCompetitiveScore(minScore);
|
||||
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
||||
if (minScore > minCompetitiveScore) {
|
||||
scorer.setMinCompetitiveScore(minScore);
|
||||
minCompetitiveScore = minScore;
|
||||
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
||||
if (minScoreAcc != null) {
|
||||
minScoreAcc.accumulate(bottom.doc, minScore);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -389,14 +447,14 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
throw new IllegalArgumentException("totalHitsThreshold must be >= 0, got " + totalHitsThreshold);
|
||||
}
|
||||
|
||||
return create(sort, numHits, after, HitsThresholdChecker.create(totalHitsThreshold));
|
||||
return create(sort, numHits, after, HitsThresholdChecker.create(totalHitsThreshold), null /* bottomValueChecker */);
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as above with an additional parameter to allow passing in the threshold checker
|
||||
* Same as above with additional parameters to allow passing in the threshold checker and the max score accumulator.
|
||||
*/
|
||||
static TopFieldCollector create(Sort sort, int numHits, FieldDoc after,
|
||||
HitsThresholdChecker hitsThresholdChecker) {
|
||||
HitsThresholdChecker hitsThresholdChecker, MaxScoreAccumulator minScoreAcc) {
|
||||
|
||||
if (sort.fields.length == 0) {
|
||||
throw new IllegalArgumentException("Sort must contain at least one field");
|
||||
|
@ -413,7 +471,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
|
||||
|
||||
if (after == null) {
|
||||
return new SimpleFieldCollector(sort, queue, numHits, hitsThresholdChecker);
|
||||
return new SimpleFieldCollector(sort, queue, numHits, hitsThresholdChecker, minScoreAcc);
|
||||
} else {
|
||||
if (after.fields == null) {
|
||||
throw new IllegalArgumentException("after.fields wasn't set; you must pass fillFields=true for the previous search");
|
||||
|
@ -423,22 +481,25 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
throw new IllegalArgumentException("after.fields has " + after.fields.length + " values but sort has " + sort.getSort().length);
|
||||
}
|
||||
|
||||
return new PagingFieldCollector(sort, queue, after, numHits, hitsThresholdChecker);
|
||||
return new PagingFieldCollector(sort, queue, after, numHits, hitsThresholdChecker, minScoreAcc);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a CollectorManager which uses a shared hit counter to maintain number of hits
|
||||
* and a shared {@link MaxScoreAccumulator} to propagate the minimum score accross segments if
|
||||
* the primary sort is by relevancy.
|
||||
*/
|
||||
public static CollectorManager<TopFieldCollector, TopFieldDocs> createSharedManager(Sort sort, int numHits, FieldDoc after,
|
||||
int totalHitsThreshold) {
|
||||
return new CollectorManager<>() {
|
||||
|
||||
private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(totalHitsThreshold);
|
||||
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
|
||||
|
||||
@Override
|
||||
public TopFieldCollector newCollector() throws IOException {
|
||||
return create(sort, numHits, after, hitsThresholdChecker);
|
||||
return create(sort, numHits, after, hitsThresholdChecker, minScoreAcc);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.MaxScoreAccumulator.DocAndScore;
|
||||
|
||||
/**
|
||||
* A {@link Collector} implementation that collects the top-scoring hits,
|
||||
|
@ -50,19 +51,24 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
private static class SimpleTopScoreDocCollector extends TopScoreDocCollector {
|
||||
|
||||
SimpleTopScoreDocCollector(int numHits, HitsThresholdChecker hitsThresholdChecker,
|
||||
BottomValueChecker bottomValueChecker) {
|
||||
super(numHits, hitsThresholdChecker, bottomValueChecker);
|
||||
MaxScoreAccumulator minScoreAcc) {
|
||||
super(numHits, hitsThresholdChecker, minScoreAcc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||
final int docBase = context.docBase;
|
||||
// reset the minimum competitive score
|
||||
docBase = context.docBase;
|
||||
return new ScorerLeafCollector() {
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
super.setScorer(scorer);
|
||||
minCompetitiveScore = 0f;
|
||||
updateMinCompetitiveScore(scorer);
|
||||
if (minScoreAcc != null) {
|
||||
updateGlobalMinCompetitiveScore(scorer);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -75,8 +81,12 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
totalHits++;
|
||||
hitsThresholdChecker.incrementHitCount();
|
||||
|
||||
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
|
||||
updateGlobalMinCompetitiveScore(scorer);
|
||||
}
|
||||
|
||||
if (score <= pqTop.score) {
|
||||
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO && hitsThresholdChecker.isThresholdReached()) {
|
||||
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
|
||||
// we just reached totalHitsThreshold, we can start setting the min
|
||||
// competitive score now
|
||||
updateMinCompetitiveScore(scorer);
|
||||
|
@ -102,8 +112,8 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
private int collectedHits;
|
||||
|
||||
PagingTopScoreDocCollector(int numHits, ScoreDoc after, HitsThresholdChecker hitsThresholdChecker,
|
||||
BottomValueChecker bottomValueChecker) {
|
||||
super(numHits, hitsThresholdChecker, bottomValueChecker);
|
||||
MaxScoreAccumulator minScoreAcc) {
|
||||
super(numHits, hitsThresholdChecker, minScoreAcc);
|
||||
this.after = after;
|
||||
this.collectedHits = 0;
|
||||
}
|
||||
|
@ -123,7 +133,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
|
||||
@Override
|
||||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||
final int docBase = context.docBase;
|
||||
docBase = context.docBase;
|
||||
final int afterDoc = after.doc - context.docBase;
|
||||
|
||||
return new ScorerLeafCollector() {
|
||||
|
@ -137,9 +147,13 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
totalHits++;
|
||||
hitsThresholdChecker.incrementHitCount();
|
||||
|
||||
if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
|
||||
updateGlobalMinCompetitiveScore(scorer);
|
||||
}
|
||||
|
||||
if (score > after.score || (score == after.score && doc <= afterDoc)) {
|
||||
// hit was collected on a previous page
|
||||
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO && hitsThresholdChecker.isThresholdReached()) {
|
||||
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
|
||||
// we just reached totalHitsThreshold, we can start setting the min
|
||||
// competitive score now
|
||||
updateMinCompetitiveScore(scorer);
|
||||
|
@ -148,6 +162,12 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
}
|
||||
|
||||
if (score <= pqTop.score) {
|
||||
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
|
||||
// we just reached totalHitsThreshold, we can start setting the min
|
||||
// competitive score now
|
||||
updateMinCompetitiveScore(scorer);
|
||||
}
|
||||
|
||||
// Since docs are returned in-order (i.e., increasing doc Id), a document
|
||||
// with equal score to pqTop.score cannot compete since HitQueue favors
|
||||
// documents with lower doc Ids. Therefore reject those docs too.
|
||||
|
@ -201,7 +221,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
}
|
||||
|
||||
static TopScoreDocCollector create(int numHits, ScoreDoc after, HitsThresholdChecker hitsThresholdChecker,
|
||||
BottomValueChecker bottomValueChecker) {
|
||||
MaxScoreAccumulator minScoreAcc) {
|
||||
|
||||
if (numHits <= 0) {
|
||||
throw new IllegalArgumentException("numHits must be > 0; please use TotalHitCountCollector if you just need the total hit count");
|
||||
|
@ -212,25 +232,26 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
}
|
||||
|
||||
if (after == null) {
|
||||
return new SimpleTopScoreDocCollector(numHits, hitsThresholdChecker, bottomValueChecker);
|
||||
return new SimpleTopScoreDocCollector(numHits, hitsThresholdChecker, minScoreAcc);
|
||||
} else {
|
||||
return new PagingTopScoreDocCollector(numHits, after, hitsThresholdChecker, bottomValueChecker);
|
||||
return new PagingTopScoreDocCollector(numHits, after, hitsThresholdChecker, minScoreAcc);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a CollectorManager which uses a shared hit counter to maintain number of hits
|
||||
* and a shared {@link MaxScoreAccumulator} to propagate the minimum score accross segments
|
||||
*/
|
||||
public static CollectorManager<TopScoreDocCollector, TopDocs> createSharedManager(int numHits, FieldDoc after,
|
||||
int totalHitsThreshold) {
|
||||
return new CollectorManager<>() {
|
||||
|
||||
private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(totalHitsThreshold);
|
||||
private final BottomValueChecker bottomValueChecker = BottomValueChecker.createMaxBottomScoreChecker();
|
||||
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
|
||||
|
||||
@Override
|
||||
public TopScoreDocCollector newCollector() throws IOException {
|
||||
return TopScoreDocCollector.create(numHits, after, hitsThresholdChecker, bottomValueChecker);
|
||||
return TopScoreDocCollector.create(numHits, after, hitsThresholdChecker, minScoreAcc);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -246,13 +267,15 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
};
|
||||
}
|
||||
|
||||
int docBase;
|
||||
ScoreDoc pqTop;
|
||||
final HitsThresholdChecker hitsThresholdChecker;
|
||||
final BottomValueChecker bottomValueChecker;
|
||||
final MaxScoreAccumulator minScoreAcc;
|
||||
float minCompetitiveScore;
|
||||
|
||||
// prevents instantiation
|
||||
TopScoreDocCollector(int numHits, HitsThresholdChecker hitsThresholdChecker,
|
||||
BottomValueChecker bottomValueChecker) {
|
||||
MaxScoreAccumulator minScoreAcc) {
|
||||
super(new HitQueue(numHits, true));
|
||||
assert hitsThresholdChecker != null;
|
||||
|
||||
|
@ -260,7 +283,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
// that at this point top() is already initialized.
|
||||
pqTop = pq.top();
|
||||
this.hitsThresholdChecker = hitsThresholdChecker;
|
||||
this.bottomValueChecker = bottomValueChecker;
|
||||
this.minScoreAcc = minScoreAcc;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -277,31 +300,41 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
return hitsThresholdChecker.scoreMode();
|
||||
}
|
||||
|
||||
protected void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException {
|
||||
assert minScoreAcc != null;
|
||||
DocAndScore maxMinScore = minScoreAcc.get();
|
||||
if (maxMinScore != null) {
|
||||
// since we tie-break on doc id and collect in doc id order we can require
|
||||
// the next float if the global minimum score is set on a document id that is
|
||||
// smaller than the ids in the current leaf
|
||||
float score = docBase > maxMinScore.docID ? Math.nextUp(maxMinScore.score) : maxMinScore.score;
|
||||
if (score > minCompetitiveScore) {
|
||||
assert hitsThresholdChecker.isThresholdReached();
|
||||
scorer.setMinCompetitiveScore(score);
|
||||
minCompetitiveScore = score;
|
||||
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
|
||||
if (hitsThresholdChecker.isThresholdReached()
|
||||
&& ((bottomValueChecker != null && bottomValueChecker.getBottomValue() > 0)
|
||||
|| (pqTop != null && pqTop.score != Float.NEGATIVE_INFINITY))) { // -Infinity is the score of sentinels
|
||||
&& pqTop != null
|
||||
&& pqTop.score != Float.NEGATIVE_INFINITY) { // -Infinity is the score of sentinels
|
||||
// since we tie-break on doc id and collect in doc id order, we can require
|
||||
// the next float
|
||||
float bottomScore = Float.NEGATIVE_INFINITY;
|
||||
|
||||
if (pqTop != null && pqTop.score != Float.NEGATIVE_INFINITY) {
|
||||
bottomScore = Math.nextUp(pqTop.score);
|
||||
|
||||
if (bottomValueChecker != null) {
|
||||
bottomValueChecker.updateThreadLocalBottomValue(pqTop.score);
|
||||
float localMinScore = Math.nextUp(pqTop.score);
|
||||
if (localMinScore > minCompetitiveScore) {
|
||||
scorer.setMinCompetitiveScore(localMinScore);
|
||||
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
||||
minCompetitiveScore = localMinScore;
|
||||
if (minScoreAcc != null) {
|
||||
// we don't use the next float but we register the document
|
||||
// id so that other leaves can require it if they are after
|
||||
// the current maximum
|
||||
minScoreAcc.accumulate(pqTop.doc, pqTop.score);
|
||||
}
|
||||
}
|
||||
|
||||
// Global bottom can only be greater than or equal to the local bottom score
|
||||
// The updating of global bottom score for this hit before getting here should
|
||||
// ensure that
|
||||
if (bottomValueChecker != null && bottomValueChecker.getBottomValue() > bottomScore) {
|
||||
bottomScore = bottomValueChecker.getBottomValue();
|
||||
}
|
||||
|
||||
scorer.setMinCompetitiveScore(bottomScore);
|
||||
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestMaxScoreAccumulator extends LuceneTestCase {
|
||||
public void testSimple() {
|
||||
MaxScoreAccumulator acc = new MaxScoreAccumulator();
|
||||
acc.accumulate(0, 0f);
|
||||
acc.accumulate(10, 0f);
|
||||
assertEquals(0f, acc.get().score, 0);
|
||||
assertEquals(10, acc.get().docID, 0);
|
||||
acc.accumulate(100, 1000f);
|
||||
assertEquals(1000f, acc.get().score, 0);
|
||||
assertEquals(100, acc.get().docID, 0);
|
||||
acc.accumulate(1000, 5f);
|
||||
assertEquals(1000f, acc.get().score, 0);
|
||||
assertEquals(100, acc.get().docID, 0);
|
||||
acc.accumulate(99, 1000f);
|
||||
assertEquals(1000f, acc.get().score, 0);
|
||||
assertEquals(100, acc.get().docID, 0);
|
||||
acc.accumulate(0, 1001f);
|
||||
assertEquals(1001f, acc.get().score, 0);
|
||||
assertEquals(0, acc.get().docID, 0);
|
||||
}
|
||||
|
||||
public void testRandom() {
|
||||
MaxScoreAccumulator acc = new MaxScoreAccumulator();
|
||||
int numDocs = atLeast(100);
|
||||
int maxDocs = atLeast(10000);
|
||||
MaxScoreAccumulator.DocAndScore max = new MaxScoreAccumulator.DocAndScore(-1, -1);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
MaxScoreAccumulator.DocAndScore res = new MaxScoreAccumulator.DocAndScore(random().nextInt(maxDocs), random().nextFloat());
|
||||
acc.accumulate(res.docID, res.score);
|
||||
if (res.compareTo(max) > 0) {
|
||||
max = res;
|
||||
}
|
||||
}
|
||||
assertEquals(max, acc.get());
|
||||
}
|
||||
}
|
|
@ -25,6 +25,8 @@ import java.util.concurrent.ThreadPoolExecutor;
|
|||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
@ -112,29 +114,27 @@ public class TestTopDocsCollector extends LuceneTestCase {
|
|||
return tdc;
|
||||
}
|
||||
|
||||
private TopDocsCollector<ScoreDoc> doSearchWithThreshold(int numResults, int thresHold) throws IOException {
|
||||
Query q = new MatchAllDocsQuery();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
private TopDocsCollector<ScoreDoc> doSearchWithThreshold(int numResults, int thresHold, Query q, IndexReader indexReader) throws IOException {
|
||||
IndexSearcher searcher = new IndexSearcher(indexReader);
|
||||
TopDocsCollector<ScoreDoc> tdc = TopScoreDocCollector.create(numResults, thresHold);
|
||||
searcher.search(q, tdc);
|
||||
return tdc;
|
||||
}
|
||||
|
||||
private TopDocs doConcurrentSearchWithThreshold(int numResults, int threshold, IndexReader reader) throws IOException {
|
||||
Query q = new MatchAllDocsQuery();
|
||||
private TopDocs doConcurrentSearchWithThreshold(int numResults, int threshold, Query q, IndexReader indexReader) throws IOException {
|
||||
ExecutorService service = new ThreadPoolExecutor(4, 4, 0L, TimeUnit.MILLISECONDS,
|
||||
new LinkedBlockingQueue<Runnable>(),
|
||||
new NamedThreadFactory("TestTopDocsCollector"));
|
||||
IndexSearcher searcher = new IndexSearcher(reader, service);
|
||||
try {
|
||||
IndexSearcher searcher = new IndexSearcher(indexReader, service);
|
||||
|
||||
CollectorManager collectorManager = TopScoreDocCollector.createSharedManager(numResults,
|
||||
null, threshold);
|
||||
CollectorManager collectorManager = TopScoreDocCollector.createSharedManager(numResults,
|
||||
null, threshold);
|
||||
|
||||
TopDocs tdc = (TopDocs) searcher.search(q, collectorManager);
|
||||
|
||||
service.shutdown();
|
||||
|
||||
return tdc;
|
||||
return (TopDocs) searcher.search(q, collectorManager);
|
||||
} finally {
|
||||
service.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -344,8 +344,8 @@ public class TestTopDocsCollector extends LuceneTestCase {
|
|||
assertEquals(2, reader.leaves().size());
|
||||
w.close();
|
||||
|
||||
TopDocsCollector collector = doSearchWithThreshold(5, 10);
|
||||
TopDocs tdc = doConcurrentSearchWithThreshold(5, 10, reader);
|
||||
TopDocsCollector collector = doSearchWithThreshold( 5, 10, q, reader);
|
||||
TopDocs tdc = doConcurrentSearchWithThreshold(5, 10, q, reader);
|
||||
TopDocs tdc2 = collector.topDocs();
|
||||
|
||||
CheckHits.checkEqual(q, tdc.scoreDocs, tdc2.scoreDocs);
|
||||
|
@ -402,7 +402,174 @@ public class TestTopDocsCollector extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testGlobalScore() throws Exception {
|
||||
public void testConcurrentMinScore() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE));
|
||||
Document doc = new Document();
|
||||
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
|
||||
w.flush();
|
||||
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc, doc));
|
||||
w.flush();
|
||||
w.addDocuments(Arrays.asList(doc, doc));
|
||||
w.flush();
|
||||
IndexReader reader = DirectoryReader.open(w);
|
||||
assertEquals(3, reader.leaves().size());
|
||||
w.close();
|
||||
|
||||
CollectorManager<TopScoreDocCollector, TopDocs> manager =
|
||||
TopScoreDocCollector.createSharedManager(2, null, 0);
|
||||
TopScoreDocCollector collector = manager.newCollector();
|
||||
TopScoreDocCollector collector2 = manager.newCollector();
|
||||
assertTrue(collector.minScoreAcc == collector2.minScoreAcc);
|
||||
MaxScoreAccumulator minValueChecker = collector.minScoreAcc;
|
||||
// force the check of the global minimum score on every round
|
||||
minValueChecker.modInterval = 0;
|
||||
|
||||
ScoreAndDoc scorer = new ScoreAndDoc();
|
||||
ScoreAndDoc scorer2 = new ScoreAndDoc();
|
||||
|
||||
LeafCollector leafCollector = collector.getLeafCollector(reader.leaves().get(0));
|
||||
leafCollector.setScorer(scorer);
|
||||
LeafCollector leafCollector2 = collector2.getLeafCollector(reader.leaves().get(1));
|
||||
leafCollector2.setScorer(scorer2);
|
||||
|
||||
scorer.doc = 0;
|
||||
scorer.score = 3;
|
||||
leafCollector.collect(0);
|
||||
assertNull(minValueChecker.get());
|
||||
assertNull(scorer.minCompetitiveScore);
|
||||
|
||||
scorer2.doc = 0;
|
||||
scorer2.score = 6;
|
||||
leafCollector2.collect(0);
|
||||
assertNull(minValueChecker.get());
|
||||
assertNull(scorer2.minCompetitiveScore);
|
||||
|
||||
scorer.doc = 1;
|
||||
scorer.score = 2;
|
||||
leafCollector.collect(1);
|
||||
assertEquals(2f, minValueChecker.get().score, 0f);
|
||||
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
|
||||
assertNull(scorer2.minCompetitiveScore);
|
||||
|
||||
scorer2.doc = 1;
|
||||
scorer2.score = 9;
|
||||
leafCollector2.collect(1);
|
||||
assertEquals(6f, minValueChecker.get().score, 0f);
|
||||
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(Math.nextUp(6f), scorer2.minCompetitiveScore, 0f);
|
||||
|
||||
scorer2.doc = 2;
|
||||
scorer2.score = 7;
|
||||
leafCollector2.collect(2);
|
||||
assertEquals(minValueChecker.get().score, 7f, 0f);
|
||||
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f);
|
||||
|
||||
scorer2.doc = 3;
|
||||
scorer2.score = 1;
|
||||
leafCollector2.collect(3);
|
||||
assertEquals(minValueChecker.get().score, 7f, 0f);
|
||||
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f);
|
||||
|
||||
scorer.doc = 2;
|
||||
scorer.score = 10;
|
||||
leafCollector.collect(2);
|
||||
assertEquals(minValueChecker.get().score, 7f, 0f);
|
||||
assertEquals(7f, scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f);
|
||||
|
||||
scorer.doc = 3;
|
||||
scorer.score = 11;
|
||||
leafCollector.collect(3);
|
||||
assertEquals(minValueChecker.get().score, 10, 0f);
|
||||
assertEquals(Math.nextUp(10f), scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f);
|
||||
|
||||
TopScoreDocCollector collector3 = manager.newCollector();
|
||||
LeafCollector leafCollector3 = collector3.getLeafCollector(reader.leaves().get(2));
|
||||
ScoreAndDoc scorer3 = new ScoreAndDoc();
|
||||
leafCollector3.setScorer(scorer3);
|
||||
assertEquals(Math.nextUp(10f), scorer3.minCompetitiveScore, 0f);
|
||||
|
||||
scorer3.doc = 0;
|
||||
scorer3.score = 1f;
|
||||
leafCollector3.collect(0);
|
||||
assertEquals(10f, minValueChecker.get().score, 0f);
|
||||
assertEquals(Math.nextUp(10f), scorer3.minCompetitiveScore, 0f);
|
||||
|
||||
scorer.doc = 4;
|
||||
scorer.score = 11;
|
||||
leafCollector.collect(4);
|
||||
assertEquals(11f, minValueChecker.get().score, 0f);
|
||||
assertEquals(Math.nextUp(11f), scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f);
|
||||
assertEquals(Math.nextUp(10f), scorer3.minCompetitiveScore, 0f);
|
||||
|
||||
scorer3.doc = 1;
|
||||
scorer3.score = 2f;
|
||||
leafCollector3.collect(1);
|
||||
assertEquals(minValueChecker.get().score, 11f, 0f);
|
||||
assertEquals(Math.nextUp(11f), scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f);
|
||||
assertEquals(Math.nextUp(11f), scorer3.minCompetitiveScore, 0f);
|
||||
|
||||
|
||||
TopDocs topDocs = manager.reduce(Arrays.asList(collector, collector2, collector3));
|
||||
assertEquals(11, topDocs.totalHits.value);
|
||||
assertEquals(new TotalHits(11, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), topDocs.totalHits);
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testRandomMinCompetitiveScore() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
|
||||
int numDocs = atLeast(1000);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
int numAs = 1 + random().nextInt(5);
|
||||
int numBs = random().nextFloat() < 0.5f ? 0 : 1 + random().nextInt(5);
|
||||
int numCs = random().nextFloat() < 0.1f ? 0 : 1 + random().nextInt(5);
|
||||
Document doc = new Document();
|
||||
for (int j = 0; j < numAs; ++j) {
|
||||
doc.add(new StringField("f", "A", Field.Store.NO));
|
||||
}
|
||||
for (int j = 0; j < numBs; ++j) {
|
||||
doc.add(new StringField("f", "B", Field.Store.NO));
|
||||
}
|
||||
for (int j = 0; j < numCs; ++j) {
|
||||
doc.add(new StringField("f", "C", Field.Store.NO));
|
||||
}
|
||||
w.addDocument(doc);
|
||||
}
|
||||
IndexReader indexReader = w.getReader();
|
||||
w.close();
|
||||
Query[] queries = new Query[]{
|
||||
new TermQuery(new Term("f", "A")),
|
||||
new TermQuery(new Term("f", "B")),
|
||||
new TermQuery(new Term("f", "C")),
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("f", "A")), BooleanClause.Occur.MUST)
|
||||
.add(new TermQuery(new Term("f", "B")), BooleanClause.Occur.SHOULD)
|
||||
.build()
|
||||
};
|
||||
for (Query query : queries) {
|
||||
TopDocsCollector collector = doSearchWithThreshold(5, 0, query, indexReader);
|
||||
TopDocs tdc = doConcurrentSearchWithThreshold(5, 0, query, indexReader);
|
||||
TopDocs tdc2 = collector.topDocs();
|
||||
|
||||
assertTrue(tdc.totalHits.value > 0);
|
||||
assertTrue(tdc2.totalHits.value > 0);
|
||||
CheckHits.checkEqual(query, tdc.scoreDocs, tdc2.scoreDocs);
|
||||
}
|
||||
|
||||
indexReader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testRealisticConcurrentMinimumScore() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
|
||||
try (LineFileDocs docs = new LineFileDocs(random())) {
|
||||
|
@ -432,8 +599,8 @@ public class TestTopDocsCollector extends LuceneTestCase {
|
|||
BytesRef term = BytesRef.deepCopyOf(termsEnum.term());
|
||||
Query query = new TermQuery(new Term("body", term));
|
||||
|
||||
TopDocsCollector collector = doSearchWithThreshold(5, 10);
|
||||
TopDocs tdc = doConcurrentSearchWithThreshold(5, 10, reader);
|
||||
TopDocsCollector collector = doSearchWithThreshold(5, 0, query, reader);
|
||||
TopDocs tdc = doConcurrentSearchWithThreshold(5, 0, query, reader);
|
||||
TopDocs tdc2 = collector.topDocs();
|
||||
|
||||
CheckHits.checkEqual(query, tdc.scoreDocs, tdc2.scoreDocs);
|
||||
|
@ -443,5 +610,4 @@ public class TestTopDocsCollector extends LuceneTestCase {
|
|||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.util.concurrent.ThreadPoolExecutor;
|
|||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
|
@ -72,6 +73,31 @@ public class TestTopFieldCollector extends LuceneTestCase {
|
|||
dir.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
private TopFieldCollector doSearchWithThreshold(int numResults, int thresHold, Query q, Sort sort, IndexReader indexReader) throws IOException {
|
||||
IndexSearcher searcher = new IndexSearcher(indexReader);
|
||||
TopFieldCollector tdc = TopFieldCollector.create(sort, numResults, thresHold);
|
||||
searcher.search(q, tdc);
|
||||
return tdc;
|
||||
}
|
||||
|
||||
private TopDocs doConcurrentSearchWithThreshold(int numResults, int threshold, Query q, Sort sort, IndexReader indexReader) throws IOException {
|
||||
ExecutorService service = new ThreadPoolExecutor(4, 4, 0L, TimeUnit.MILLISECONDS,
|
||||
new LinkedBlockingQueue<Runnable>(),
|
||||
new NamedThreadFactory("TestTopDocsCollector"));
|
||||
try {
|
||||
IndexSearcher searcher = new IndexSearcher(indexReader, service);
|
||||
|
||||
CollectorManager collectorManager = TopFieldCollector.createSharedManager(sort, numResults,
|
||||
null, threshold);
|
||||
|
||||
TopDocs tdc = (TopDocs) searcher.search(q, collectorManager);
|
||||
|
||||
return tdc;
|
||||
} finally {
|
||||
service.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
public void testSortWithoutFillFields() throws Exception {
|
||||
|
||||
|
@ -495,4 +521,173 @@ public class TestTopFieldCollector extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testConcurrentMinScore() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE));
|
||||
Document doc = new Document();
|
||||
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
|
||||
w.flush();
|
||||
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc, doc));
|
||||
w.flush();
|
||||
w.addDocuments(Arrays.asList(doc, doc));
|
||||
w.flush();
|
||||
IndexReader reader = DirectoryReader.open(w);
|
||||
assertEquals(3, reader.leaves().size());
|
||||
w.close();
|
||||
|
||||
Sort sort = new Sort(SortField.FIELD_SCORE, SortField.FIELD_DOC);
|
||||
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
|
||||
TopFieldCollector.createSharedManager(sort, 2, null, 0);
|
||||
TopFieldCollector collector = manager.newCollector();
|
||||
TopFieldCollector collector2 = manager.newCollector();
|
||||
assertTrue(collector.minScoreAcc == collector2.minScoreAcc);
|
||||
MaxScoreAccumulator minValueChecker = collector.minScoreAcc;
|
||||
// force the check of the global minimum score on every round
|
||||
minValueChecker.modInterval = 0;
|
||||
|
||||
ScoreAndDoc scorer = new ScoreAndDoc();
|
||||
ScoreAndDoc scorer2 = new ScoreAndDoc();
|
||||
|
||||
LeafCollector leafCollector = collector.getLeafCollector(reader.leaves().get(0));
|
||||
leafCollector.setScorer(scorer);
|
||||
LeafCollector leafCollector2 = collector2.getLeafCollector(reader.leaves().get(1));
|
||||
leafCollector2.setScorer(scorer2);
|
||||
|
||||
scorer.doc = 0;
|
||||
scorer.score = 3;
|
||||
leafCollector.collect(0);
|
||||
assertNull(minValueChecker.get());
|
||||
assertNull(scorer.minCompetitiveScore);
|
||||
|
||||
scorer2.doc = 0;
|
||||
scorer2.score = 6;
|
||||
leafCollector2.collect(0);
|
||||
assertNull(minValueChecker.get());
|
||||
assertNull(scorer2.minCompetitiveScore);
|
||||
|
||||
scorer.doc = 1;
|
||||
scorer.score = 2;
|
||||
leafCollector.collect(1);
|
||||
assertEquals(2f, minValueChecker.get().score, 0f);
|
||||
assertEquals(2f, scorer.minCompetitiveScore, 0f);
|
||||
assertNull(scorer2.minCompetitiveScore);
|
||||
|
||||
scorer2.doc = 1;
|
||||
scorer2.score = 9;
|
||||
leafCollector2.collect(1);
|
||||
assertEquals(6f, minValueChecker.get().score, 0f);
|
||||
assertEquals(2f, scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(6f, scorer2.minCompetitiveScore, 0f);
|
||||
|
||||
scorer2.doc = 2;
|
||||
scorer2.score = 7;
|
||||
leafCollector2.collect(2);
|
||||
assertEquals(7f, minValueChecker.get().score, 0f);
|
||||
assertEquals(2f, scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(7f, scorer2.minCompetitiveScore, 0f);
|
||||
|
||||
scorer2.doc = 3;
|
||||
scorer2.score = 1;
|
||||
leafCollector2.collect(3);
|
||||
assertEquals(7f, minValueChecker.get().score, 0f);
|
||||
assertEquals(2f, scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(7f, scorer2.minCompetitiveScore, 0f);
|
||||
|
||||
scorer.doc = 2;
|
||||
scorer.score = 10;
|
||||
leafCollector.collect(2);
|
||||
assertEquals(7f, minValueChecker.get().score, 0f);
|
||||
assertEquals(7f, scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(7f, scorer2.minCompetitiveScore, 0f);
|
||||
|
||||
scorer.doc = 3;
|
||||
scorer.score = 11;
|
||||
leafCollector.collect(3);
|
||||
assertEquals(10f, minValueChecker.get().score, 0f);
|
||||
assertEquals(10f, scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(7f, scorer2.minCompetitiveScore, 0f);
|
||||
|
||||
TopFieldCollector collector3 = manager.newCollector();
|
||||
LeafCollector leafCollector3 = collector3.getLeafCollector(reader.leaves().get(2));
|
||||
ScoreAndDoc scorer3 = new ScoreAndDoc();
|
||||
leafCollector3.setScorer(scorer3);
|
||||
assertEquals(10f, scorer3.minCompetitiveScore, 0f);
|
||||
|
||||
scorer3.doc = 0;
|
||||
scorer3.score = 1f;
|
||||
leafCollector3.collect(0);
|
||||
assertEquals(10f, minValueChecker.get().score, 0f);
|
||||
assertEquals(10f, scorer3.minCompetitiveScore, 0f);
|
||||
|
||||
scorer.doc = 4;
|
||||
scorer.score = 11;
|
||||
leafCollector.collect(4);
|
||||
assertEquals(11f, minValueChecker.get().score, 0f);
|
||||
assertEquals(11f, scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(7f, scorer2.minCompetitiveScore, 0f);
|
||||
assertEquals(10f, scorer3.minCompetitiveScore, 0f);
|
||||
|
||||
scorer3.doc = 1;
|
||||
scorer3.score = 2f;
|
||||
leafCollector3.collect(1);
|
||||
assertEquals(11f, minValueChecker.get().score, 0f);
|
||||
assertEquals(11f, scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(7f, scorer2.minCompetitiveScore, 0f);
|
||||
assertEquals(11f, scorer3.minCompetitiveScore, 0f);
|
||||
|
||||
|
||||
TopFieldDocs topDocs = manager.reduce(Arrays.asList(collector, collector2, collector3));
|
||||
assertEquals(11, topDocs.totalHits.value);
|
||||
assertEquals(new TotalHits(11, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), topDocs.totalHits);
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testRandomMinCompetitiveScore() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
|
||||
int numDocs = atLeast(1000);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
int numAs = 1 + random().nextInt(5);
|
||||
int numBs = random().nextFloat() < 0.5f ? 0 : 1 + random().nextInt(5);
|
||||
int numCs = random().nextFloat() < 0.1f ? 0 : 1 + random().nextInt(5);
|
||||
Document doc = new Document();
|
||||
for (int j = 0; j < numAs; ++j) {
|
||||
doc.add(new StringField("f", "A", Field.Store.NO));
|
||||
}
|
||||
for (int j = 0; j < numBs; ++j) {
|
||||
doc.add(new StringField("f", "B", Field.Store.NO));
|
||||
}
|
||||
for (int j = 0; j < numCs; ++j) {
|
||||
doc.add(new StringField("f", "C", Field.Store.NO));
|
||||
}
|
||||
w.addDocument(doc);
|
||||
}
|
||||
IndexReader indexReader = w.getReader();
|
||||
w.close();
|
||||
Query[] queries = new Query[]{
|
||||
new TermQuery(new Term("f", "A")),
|
||||
new TermQuery(new Term("f", "B")),
|
||||
new TermQuery(new Term("f", "C")),
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("f", "A")), BooleanClause.Occur.MUST)
|
||||
.add(new TermQuery(new Term("f", "B")), BooleanClause.Occur.SHOULD)
|
||||
.build()
|
||||
};
|
||||
for (Query query : queries) {
|
||||
Sort sort = new Sort(new SortField[]{SortField.FIELD_SCORE, SortField.FIELD_DOC});
|
||||
TopFieldCollector fieldCollector = doSearchWithThreshold(5, 0, query, sort, indexReader);
|
||||
TopDocs tdc = doConcurrentSearchWithThreshold(5, 0, query, sort, indexReader);
|
||||
TopDocs tdc2 = fieldCollector.topDocs();
|
||||
|
||||
assertTrue(tdc.totalHits.value > 0);
|
||||
assertTrue(tdc2.totalHits.value > 0);
|
||||
CheckHits.checkEqual(query, tdc.scoreDocs, tdc2.scoreDocs);
|
||||
}
|
||||
|
||||
indexReader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue