LUCENE-8602: Share TermsEnum if possible while applying DV updates

Today we pull a new terms enum when we apply DV updates even though the
field stays the same which is the common case. Benchmarking this on a
larger term dictionary with a significant number of updates shows a
2x improvement in performance.
This commit is contained in:
Simon Willnauer 2018-12-11 17:28:08 +01:00
parent 3147c131e0
commit d08e2d49d3
1 changed files with 8 additions and 5 deletions

View File

@ -468,7 +468,7 @@ final class FrozenBufferedUpdates {
long delGen,
boolean segmentPrivateDeletes) throws IOException {
TermsEnum termsEnum;
TermsEnum termsEnum = null;
PostingsEnum postingsEnum = null;
// TODO: we can process the updates per DV field, from last to first so that
@ -492,11 +492,14 @@ final class FrozenBufferedUpdates {
boolean isNumeric = value.isNumeric();
FieldUpdatesBuffer.BufferedUpdateIterator iterator = value.iterator();
FieldUpdatesBuffer.BufferedUpdate bufferedUpdate;
String previousField = null;
while ((bufferedUpdate = iterator.next()) != null) {
Terms terms = segState.reader.terms(bufferedUpdate.termField);
if (terms != null) {
termsEnum = terms.iterator();
} else {
if (previousField == null || previousField.equals(bufferedUpdate.termField) == false) {
previousField = bufferedUpdate.termField;
Terms terms = segState.reader.terms(previousField);
termsEnum = terms == null ? null : terms.iterator();
}
if (termsEnum == null) {
// no terms in this segment for this field
continue;
}