Don't count hits via the collector if the hit count can be computed from index stats. (#33701)

This is something that we were already doing when sorting by field, which is
now also done when sorting by score. As-is this change will speed up top-k
`term` queries. This could work for `match_all` queries as well when we
implement the `setMinCompetitiveScore` API on their Scorer.
This commit is contained in:
Adrien Grand 2018-09-14 14:59:16 +02:00 committed by GitHub
parent 2282150f34
commit 4f68104865
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 35 additions and 39 deletions

View File

@ -174,6 +174,16 @@ abstract class TopDocsCollectorContext extends QueryCollectorContext {
} }
abstract static class SimpleTopDocsCollectorContext extends TopDocsCollectorContext { abstract static class SimpleTopDocsCollectorContext extends TopDocsCollectorContext {
private static TopDocsCollector<?> createCollector(@Nullable SortAndFormats sortAndFormats, int numHits,
@Nullable ScoreDoc searchAfter, int hitCountThreshold) {
if (sortAndFormats == null) {
return TopScoreDocCollector.create(numHits, searchAfter, hitCountThreshold);
} else {
return TopFieldCollector.create(sortAndFormats.sort, numHits, (FieldDoc) searchAfter, hitCountThreshold);
}
}
private final @Nullable SortAndFormats sortAndFormats; private final @Nullable SortAndFormats sortAndFormats;
private final Collector collector; private final Collector collector;
private final Supplier<TotalHits> totalHitsSupplier; private final Supplier<TotalHits> totalHitsSupplier;
@ -201,12 +211,27 @@ abstract class TopDocsCollectorContext extends QueryCollectorContext {
boolean hasFilterCollector) throws IOException { boolean hasFilterCollector) throws IOException {
super(REASON_SEARCH_TOP_HITS, numHits); super(REASON_SEARCH_TOP_HITS, numHits);
this.sortAndFormats = sortAndFormats; this.sortAndFormats = sortAndFormats;
// implicit total hit counts are valid only when there is no filter collector in the chain
final int hitCount = hasFilterCollector ? -1 : shortcutTotalHitCount(reader, query);
final TopDocsCollector<?> topDocsCollector;
if (hitCount == -1 && trackTotalHits) {
topDocsCollector = createCollector(sortAndFormats, numHits, searchAfter, Integer.MAX_VALUE);
topDocsSupplier = new CachedSupplier<>(topDocsCollector::topDocs);
totalHitsSupplier = () -> topDocsSupplier.get().totalHits;
} else {
topDocsCollector = createCollector(sortAndFormats, numHits, searchAfter, 1); // don't compute hit counts via the collector
topDocsSupplier = new CachedSupplier<>(topDocsCollector::topDocs);
if (hitCount == -1) {
assert trackTotalHits == false;
totalHitsSupplier = () -> new TotalHits(0, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO);
} else {
totalHitsSupplier = () -> new TotalHits(hitCount, TotalHits.Relation.EQUAL_TO);
}
}
MaxScoreCollector maxScoreCollector = null;
if (sortAndFormats == null) { if (sortAndFormats == null) {
final TopDocsCollector<?> topDocsCollector = TopScoreDocCollector.create(numHits, searchAfter, Integer.MAX_VALUE); maxScoreSupplier = () -> {
this.collector = topDocsCollector;
this.topDocsSupplier = new CachedSupplier<>(topDocsCollector::topDocs);
this.totalHitsSupplier = () -> topDocsSupplier.get().totalHits;
this.maxScoreSupplier = () -> {
TopDocs topDocs = topDocsSupplier.get(); TopDocs topDocs = topDocsSupplier.get();
if (topDocs.scoreDocs.length == 0) { if (topDocs.scoreDocs.length == 0) {
return Float.NaN; return Float.NaN;
@ -214,42 +239,13 @@ abstract class TopDocsCollectorContext extends QueryCollectorContext {
return topDocs.scoreDocs[0].score; return topDocs.scoreDocs[0].score;
} }
}; };
} else { } else if (trackMaxScore) {
/**
* We explicitly don't track total hits in the topdocs collector, it can early terminate
* if the sort matches the index sort.
*/
final TopDocsCollector<?> topDocsCollector = TopFieldCollector.create(sortAndFormats.sort, numHits,
(FieldDoc) searchAfter, 1);
this.topDocsSupplier = new CachedSupplier<>(topDocsCollector::topDocs);
TotalHitCountCollector hitCountCollector = null;
if (trackTotalHits) {
// implicit total hit counts are valid only when there is no filter collector in the chain
int count = hasFilterCollector ? -1 : shortcutTotalHitCount(reader, query);
if (count != -1) {
// we can extract the total count from the shard statistics directly
this.totalHitsSupplier = () -> new TotalHits(count, TotalHits.Relation.EQUAL_TO);
} else {
// wrap a collector that counts the total number of hits even
// if the top docs collector terminates early
final TotalHitCountCollector countingCollector = new TotalHitCountCollector();
hitCountCollector = countingCollector;
this.totalHitsSupplier = () -> new TotalHits(countingCollector.getTotalHits(), TotalHits.Relation.EQUAL_TO);
}
} else {
// total hit count is not needed
// for bwc hit count is set to 0, it will be converted to -1 by the coordinating node
this.totalHitsSupplier = () -> new TotalHits(0, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO);
}
MaxScoreCollector maxScoreCollector = null;
if (trackMaxScore) {
maxScoreCollector = new MaxScoreCollector(); maxScoreCollector = new MaxScoreCollector();
maxScoreSupplier = maxScoreCollector::getMaxScore; maxScoreSupplier = maxScoreCollector::getMaxScore;
} else { } else {
maxScoreSupplier = () -> Float.NaN; maxScoreSupplier = () -> Float.NaN;
} }
collector = MultiCollector.wrap(topDocsCollector, hitCountCollector, maxScoreCollector); this.collector = MultiCollector.wrap(topDocsCollector, maxScoreCollector);
}
} }
@Override @Override