mirror of https://github.com/apache/lucene.git
LUCENE-10110: MultiCollector should conditionally wrap single leaf collector (#303)
MultiCollector should wrap single leaf collector that wants to skip low-scoring hits but the combined score mode doesn't allow it.
This commit is contained in:
parent
6c1e5920d8
commit
ccf0d5404d
|
@ -402,7 +402,9 @@ Optimizations
|
|||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
* LUCENE-10110: MultiCollector now handles single leaf collector that wants to skip low-scoring hits
|
||||
but the combined score mode doesn't allow it. (Jim Ferenczi)
|
||||
|
||||
Build
|
||||
---------------------
|
||||
|
|
|
@ -26,6 +26,11 @@ import org.apache.lucene.index.LeafReaderContext;
|
|||
* A {@link Collector} which allows running a search with several {@link Collector}s. It offers a
|
||||
* static {@link #wrap} method which accepts a list of collectors and wraps them with {@link
|
||||
* MultiCollector}, while filtering out the <code>null</code> null ones.
|
||||
*
|
||||
* <p><b>NOTE:</b>When mixing collectors that want to skip low-scoring hits ({@link
|
||||
* ScoreMode#TOP_SCORES}) with ones that require to see all hits, such as mixing {@link
|
||||
* TopScoreDocCollector} and {@link TotalHitCountCollector}, it should be faster to run the query
|
||||
* twice, once for each collector, rather than using this wrapper on a single search.
|
||||
*/
|
||||
public class MultiCollector implements Collector {
|
||||
|
||||
|
@ -112,6 +117,7 @@ public class MultiCollector implements Collector {
|
|||
@Override
|
||||
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
|
||||
final List<LeafCollector> leafCollectors = new ArrayList<>(collectors.length);
|
||||
ScoreMode leafScoreMode = null;
|
||||
for (Collector collector : collectors) {
|
||||
final LeafCollector leafCollector;
|
||||
try {
|
||||
|
@ -122,16 +128,24 @@ public class MultiCollector implements Collector {
|
|||
// this leaf collector does not need this segment
|
||||
continue;
|
||||
}
|
||||
if (leafScoreMode == null) {
|
||||
leafScoreMode = collector.scoreMode();
|
||||
} else if (leafScoreMode != collector.scoreMode()) {
|
||||
leafScoreMode = ScoreMode.COMPLETE;
|
||||
}
|
||||
leafCollectors.add(leafCollector);
|
||||
}
|
||||
switch (leafCollectors.size()) {
|
||||
case 0:
|
||||
throw new CollectionTerminatedException();
|
||||
case 1:
|
||||
if (leafCollectors.isEmpty()) {
|
||||
throw new CollectionTerminatedException();
|
||||
} else {
|
||||
// Wraps single leaf collector that wants to skip low-scoring hits (ScoreMode.TOP_SCORES)
|
||||
// but the global score mode doesn't allow it.
|
||||
if (leafCollectors.size() == 1
|
||||
&& (scoreMode() == ScoreMode.TOP_SCORES || leafScoreMode != ScoreMode.TOP_SCORES)) {
|
||||
return leafCollectors.get(0);
|
||||
default:
|
||||
return new MultiLeafCollector(
|
||||
leafCollectors, cacheScores, scoreMode() == ScoreMode.TOP_SCORES);
|
||||
}
|
||||
return new MultiLeafCollector(
|
||||
leafCollectors, cacheScores, scoreMode() == ScoreMode.TOP_SCORES);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -220,6 +220,69 @@ public class TestMultiCollector extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testDisablesSetMinScoreWithEarlyTermination() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
|
||||
w.addDocument(new Document());
|
||||
IndexReader reader = DirectoryReader.open(w);
|
||||
w.close();
|
||||
|
||||
Scorable scorer =
|
||||
new Scorable() {
|
||||
@Override
|
||||
public int docID() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setMinCompetitiveScore(float minScore) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
};
|
||||
|
||||
Collector collector =
|
||||
new SimpleCollector() {
|
||||
private Scorable scorer;
|
||||
float minScore = 0;
|
||||
|
||||
@Override
|
||||
public ScoreMode scoreMode() {
|
||||
return ScoreMode.TOP_SCORES;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
this.scorer = scorer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
minScore = Math.nextUp(minScore);
|
||||
scorer.setMinCompetitiveScore(minScore);
|
||||
}
|
||||
};
|
||||
for (int numCol = 1; numCol < 4; numCol++) {
|
||||
List<Collector> cols = new ArrayList<>();
|
||||
cols.add(collector);
|
||||
for (int col = 0; col < numCol; col++) {
|
||||
cols.add(new TerminateAfterCollector(new TotalHitCountCollector(), 0));
|
||||
}
|
||||
Collections.shuffle(cols, random());
|
||||
Collector multiCollector = MultiCollector.wrap(cols);
|
||||
LeafCollector leafCollector = multiCollector.getLeafCollector(reader.leaves().get(0));
|
||||
leafCollector.setScorer(scorer);
|
||||
leafCollector.collect(0); // no exception
|
||||
}
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private static class DummyCollector extends SimpleCollector {
|
||||
|
||||
boolean collectCalled = false;
|
||||
|
|
Loading…
Reference in New Issue