mirror of https://github.com/apache/lucene.git
LUCENE-10207: TermInSetQuery now provides a ScoreSupplier with cost estimation for use in IndexOrDocValuesQuery (#1058)
This commit is contained in:
parent
0462a0ad73
commit
680f21dca5
|
@ -101,6 +101,9 @@ Improvements
|
||||||
---------------------
|
---------------------
|
||||||
* LUCENE-10592: Build HNSW Graph on indexing. (Mayya Sharipova, Adrien Grand, Julie Tibshirani)
|
* LUCENE-10592: Build HNSW Graph on indexing. (Mayya Sharipova, Adrien Grand, Julie Tibshirani)
|
||||||
|
|
||||||
|
* LUCENE-10207: TermInSetQuery can now provide a ScoreSupplier with cost estimation, making it
|
||||||
|
usable in IndexOrDocValuesQuery. (Greg Miller)
|
||||||
|
|
||||||
* GITHUB#11715: Add Integer awareness to RamUsageEstimator.sizeOf (Mike Drob)
|
* GITHUB#11715: Add Integer awareness to RamUsageEstimator.sizeOf (Mike Drob)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
|
@ -354,15 +354,69 @@ public class TermInSetQuery extends Query implements Accountable {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
|
||||||
final WeightOrDocIdSet weightOrBitSet = rewrite(context);
|
Terms indexTerms = context.reader().terms(field);
|
||||||
if (weightOrBitSet == null) {
|
if (indexTerms == null) {
|
||||||
return null;
|
return null;
|
||||||
} else if (weightOrBitSet.weight != null) {
|
|
||||||
return weightOrBitSet.weight.scorer(context);
|
|
||||||
} else {
|
|
||||||
return scorer(weightOrBitSet.set);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Cost estimation reasoning is:
|
||||||
|
// 1. Assume every query term matches at least one document (queryTermsCount).
|
||||||
|
// 2. Determine the total number of docs beyond the first one for each term.
|
||||||
|
// That count provides a ceiling on the number of extra docs that could match beyond
|
||||||
|
// that first one. (We omit the first since it's already been counted in #1).
|
||||||
|
// This approach still provides correct worst-case cost in general, but provides tighter
|
||||||
|
// estimates for primary-key-like fields. See: LUCENE-10207
|
||||||
|
|
||||||
|
// TODO: This cost estimation may grossly overestimate since we have no index statistics
|
||||||
|
// for the specific query terms. While it's nice to avoid the cost of intersecting the
|
||||||
|
// query terms with the index, it could be beneficial to do that work and get better
|
||||||
|
// cost estimates.
|
||||||
|
final long cost;
|
||||||
|
final long queryTermsCount = termData.size();
|
||||||
|
long potentialExtraCost = indexTerms.getSumDocFreq();
|
||||||
|
final long indexedTermCount = indexTerms.size();
|
||||||
|
if (indexedTermCount != -1) {
|
||||||
|
potentialExtraCost -= indexedTermCount;
|
||||||
|
}
|
||||||
|
cost = queryTermsCount + potentialExtraCost;
|
||||||
|
|
||||||
|
final Weight weight = this;
|
||||||
|
return new ScorerSupplier() {
|
||||||
|
@Override
|
||||||
|
public Scorer get(long leadCost) throws IOException {
|
||||||
|
WeightOrDocIdSet weightOrDocIdSet = rewrite(context);
|
||||||
|
if (weightOrDocIdSet == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
final Scorer scorer;
|
||||||
|
if (weightOrDocIdSet.weight != null) {
|
||||||
|
scorer = weightOrDocIdSet.weight.scorer(context);
|
||||||
|
} else {
|
||||||
|
scorer = scorer(weightOrDocIdSet.set);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Objects.requireNonNullElseGet(
|
||||||
|
scorer,
|
||||||
|
() ->
|
||||||
|
new ConstantScoreScorer(weight, score(), scoreMode, DocIdSetIterator.empty()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long cost() {
|
||||||
|
return cost;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||||
|
final ScorerSupplier supplier = scorerSupplier(context);
|
||||||
|
if (supplier == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return supplier.get(Long.MAX_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue