mirror of https://github.com/apache/lucene.git
LUCENE-8602: Share TermsEnum if possible while applying DV updates
Today we pull a new terms enum when we apply DV updates even though the field stays the same which is the common case. Benchmarking this on a larger term dictionary with a significant number of updates shows a 2x improvement in performance.
This commit is contained in:
parent
3147c131e0
commit
d08e2d49d3
|
@ -468,7 +468,7 @@ final class FrozenBufferedUpdates {
|
|||
long delGen,
|
||||
boolean segmentPrivateDeletes) throws IOException {
|
||||
|
||||
TermsEnum termsEnum;
|
||||
TermsEnum termsEnum = null;
|
||||
PostingsEnum postingsEnum = null;
|
||||
|
||||
// TODO: we can process the updates per DV field, from last to first so that
|
||||
|
@ -492,11 +492,14 @@ final class FrozenBufferedUpdates {
|
|||
boolean isNumeric = value.isNumeric();
|
||||
FieldUpdatesBuffer.BufferedUpdateIterator iterator = value.iterator();
|
||||
FieldUpdatesBuffer.BufferedUpdate bufferedUpdate;
|
||||
String previousField = null;
|
||||
while ((bufferedUpdate = iterator.next()) != null) {
|
||||
Terms terms = segState.reader.terms(bufferedUpdate.termField);
|
||||
if (terms != null) {
|
||||
termsEnum = terms.iterator();
|
||||
} else {
|
||||
if (previousField == null || previousField.equals(bufferedUpdate.termField) == false) {
|
||||
previousField = bufferedUpdate.termField;
|
||||
Terms terms = segState.reader.terms(previousField);
|
||||
termsEnum = terms == null ? null : terms.iterator();
|
||||
}
|
||||
if (termsEnum == null) {
|
||||
// no terms in this segment for this field
|
||||
continue;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue