Better PostingsEnum reuse in MultiTermQueryConstantScoreBlendedWrapper (#12179)

This commit is contained in:
Greg Miller 2023-03-06 09:09:52 -08:00 committed by GitHub
parent 3bd06b1cb9
commit b4f969c197
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 7 additions and 4 deletions

View File

@ -153,6 +153,8 @@ Optimizations
* GITHUB#12139: Faster indexing of string fields. (Adrien Grand)
* GITHUB#12179: Better PostingsEnum reuse in MultiTermQueryConstantScoreBlendedWrapper. (Greg Miller)
Bug Fixes
---------------------

View File

@ -64,21 +64,22 @@ final class MultiTermQueryConstantScoreBlendedWrapper<Q extends MultiTermQuery>
};
// Handle the already-collected terms:
PostingsEnum reuse = null;
if (collectedTerms.isEmpty() == false) {
TermsEnum termsEnum2 = terms.iterator();
for (TermAndState t : collectedTerms) {
termsEnum2.seekExact(t.term, t.state);
PostingsEnum postings = termsEnum2.postings(null, PostingsEnum.NONE);
reuse = termsEnum2.postings(reuse, PostingsEnum.NONE);
if (t.docFreq <= POSTINGS_PRE_PROCESS_THRESHOLD) {
otherTerms.add(postings);
otherTerms.add(reuse);
} else {
highFrequencyTerms.add(postings);
highFrequencyTerms.add(reuse);
reuse = null; // can't reuse since we haven't processed the postings
}
}
}
// Then collect remaining terms:
PostingsEnum reuse = null;
do {
reuse = termsEnum.postings(reuse, PostingsEnum.NONE);
// If a term contains all docs with a value for the specified field, we can discard the