LUCENE-10110: MultiCollector should conditionally wrap single leaf collector (#303)

MultiCollector should wrap single leaf collector that wants to skip low-scoring hits
 but the combined score mode doesn't allow it.
This commit is contained in:
Jim Ferenczi 2021-09-20 07:26:51 +02:00 committed by GitHub
parent 6c1e5920d8
commit ccf0d5404d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 87 additions and 8 deletions

View File

@ -402,7 +402,9 @@ Optimizations
Bug Fixes
---------------------
(No changes)
* LUCENE-10110: MultiCollector now handles single leaf collector that wants to skip low-scoring hits
but the combined score mode doesn't allow it. (Jim Ferenczi)
Build
---------------------

View File

@ -26,6 +26,11 @@ import org.apache.lucene.index.LeafReaderContext;
* A {@link Collector} which allows running a search with several {@link Collector}s. It offers a
* static {@link #wrap} method which accepts a list of collectors and wraps them with {@link
* MultiCollector}, while filtering out the <code>null</code> null ones.
*
* <p><b>NOTE:</b>When mixing collectors that want to skip low-scoring hits ({@link
* ScoreMode#TOP_SCORES}) with ones that require to see all hits, such as mixing {@link
* TopScoreDocCollector} and {@link TotalHitCountCollector}, it should be faster to run the query
* twice, once for each collector, rather than using this wrapper on a single search.
*/
public class MultiCollector implements Collector {
@ -112,6 +117,7 @@ public class MultiCollector implements Collector {
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
final List<LeafCollector> leafCollectors = new ArrayList<>(collectors.length);
ScoreMode leafScoreMode = null;
for (Collector collector : collectors) {
final LeafCollector leafCollector;
try {
@ -122,16 +128,24 @@ public class MultiCollector implements Collector {
// this leaf collector does not need this segment
continue;
}
if (leafScoreMode == null) {
leafScoreMode = collector.scoreMode();
} else if (leafScoreMode != collector.scoreMode()) {
leafScoreMode = ScoreMode.COMPLETE;
}
leafCollectors.add(leafCollector);
}
switch (leafCollectors.size()) {
case 0:
throw new CollectionTerminatedException();
case 1:
if (leafCollectors.isEmpty()) {
throw new CollectionTerminatedException();
} else {
// Wraps single leaf collector that wants to skip low-scoring hits (ScoreMode.TOP_SCORES)
// but the global score mode doesn't allow it.
if (leafCollectors.size() == 1
&& (scoreMode() == ScoreMode.TOP_SCORES || leafScoreMode != ScoreMode.TOP_SCORES)) {
return leafCollectors.get(0);
default:
return new MultiLeafCollector(
leafCollectors, cacheScores, scoreMode() == ScoreMode.TOP_SCORES);
}
return new MultiLeafCollector(
leafCollectors, cacheScores, scoreMode() == ScoreMode.TOP_SCORES);
}
}

View File

@ -220,6 +220,69 @@ public class TestMultiCollector extends LuceneTestCase {
dir.close();
}
public void testDisablesSetMinScoreWithEarlyTermination() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
w.addDocument(new Document());
IndexReader reader = DirectoryReader.open(w);
w.close();
Scorable scorer =
new Scorable() {
@Override
public int docID() {
throw new UnsupportedOperationException();
}
@Override
public float score() {
return 0;
}
@Override
public void setMinCompetitiveScore(float minScore) {
throw new AssertionError();
}
};
Collector collector =
new SimpleCollector() {
private Scorable scorer;
float minScore = 0;
@Override
public ScoreMode scoreMode() {
return ScoreMode.TOP_SCORES;
}
@Override
public void setScorer(Scorable scorer) throws IOException {
this.scorer = scorer;
}
@Override
public void collect(int doc) throws IOException {
minScore = Math.nextUp(minScore);
scorer.setMinCompetitiveScore(minScore);
}
};
for (int numCol = 1; numCol < 4; numCol++) {
List<Collector> cols = new ArrayList<>();
cols.add(collector);
for (int col = 0; col < numCol; col++) {
cols.add(new TerminateAfterCollector(new TotalHitCountCollector(), 0));
}
Collections.shuffle(cols, random());
Collector multiCollector = MultiCollector.wrap(cols);
LeafCollector leafCollector = multiCollector.getLeafCollector(reader.leaves().get(0));
leafCollector.setScorer(scorer);
leafCollector.collect(0); // no exception
}
reader.close();
dir.close();
}
private static class DummyCollector extends SimpleCollector {
boolean collectCalled = false;