From b4e1fe4393b5b23a0de3658c15fbcaf29255b082 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Wed, 5 Dec 2018 21:58:30 +0100 Subject: [PATCH] LUCENE-8593: Specialize single value numeric DV updates The case when all values are the the same on a numeric field update is common for soft_deletes. With the new infrastucture for buffering DV updates we can gain an easy win by specializing the applied updates if all values are the same. --- .../lucene/index/DocValuesFieldUpdates.java | 48 +++++++++++++++++++ .../lucene/index/FieldUpdatesBuffer.java | 12 +++++ .../lucene/index/FrozenBufferedUpdates.java | 16 +++++-- .../index/NumericDocValuesFieldUpdates.java | 20 ++++++++ .../lucene/index/TestFieldUpdatesBuffer.java | 7 +++ 5 files changed, 98 insertions(+), 5 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java b/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java index 093a428e961..9bf91794fb6 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java @@ -425,4 +425,52 @@ abstract class DocValuesFieldUpdates implements Accountable { return hasValue; } } + + static abstract class SingleValueDocValuesFieldUpdates extends DocValuesFieldUpdates { + + protected SingleValueDocValuesFieldUpdates(int maxDoc, long delGen, String field, DocValuesType type) { + super(maxDoc, delGen, field, type); + } + + @Override + void add(int doc, long value) { + assert longValue() == value; + super.add(doc); + } + + @Override + void add(int doc, BytesRef value) { + assert binaryValue().equals(value); + super.add(doc); + } + + @Override + void add(int docId, Iterator iterator) { + throw new UnsupportedOperationException(); + } + + protected abstract BytesRef binaryValue(); + + protected abstract long longValue(); + + @Override + Iterator iterator() { + return new DocValuesFieldUpdates.AbstractIterator(size, docs, delGen) { + @Override + protected void set(long idx) { + // nothing to do; + } + + @Override + long longValue() { + return SingleValueDocValuesFieldUpdates.this.longValue(); + } + + @Override + BytesRef binaryValue() { + return SingleValueDocValuesFieldUpdates.this.binaryValue(); + } + }; + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldUpdatesBuffer.java b/lucene/core/src/java/org/apache/lucene/index/FieldUpdatesBuffer.java index acfa88e4b1f..31a91bb1247 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FieldUpdatesBuffer.java +++ b/lucene/core/src/java/org/apache/lucene/index/FieldUpdatesBuffer.java @@ -183,6 +183,18 @@ final class FieldUpdatesBuffer { return isNumeric; } + boolean hasSingleValue() { + // we only do this optimization for numerics so far. + return isNumeric && numericValues.length == 1; + } + + long getNumericValue(int idx) { + if (hasValues != null && hasValues.get(idx) == false) { + return 0; + } + return numericValues[getArrayIndex(numericValues.length, idx)]; + } + /** * Struct like class that is used to iterate over all updates in this buffer */ diff --git a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java index 266db310057..94138c7bdf9 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java +++ b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java @@ -484,9 +484,10 @@ final class FrozenBufferedUpdates { long updateCount = 0; // We first write all our updates private, and only in the end publish to the ReadersAndUpdates */ - Map holder = new HashMap<>(); + final List resolvedUpdates = new ArrayList<>(); for (Map.Entry fieldUpdate : updates.entrySet()) { String updateField = fieldUpdate.getKey(); + DocValuesFieldUpdates dvUpdates = null; FieldUpdatesBuffer value = fieldUpdate.getValue(); boolean isNumeric = value.isNumeric(); FieldUpdatesBuffer.BufferedUpdateIterator iterator = value.iterator(); @@ -534,14 +535,19 @@ final class FrozenBufferedUpdates { if (termsEnum.seekExact(bufferedUpdate.termValue)) { // we don't need term frequencies for this postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); - DocValuesFieldUpdates dvUpdates = holder.get(updateField); if (dvUpdates == null) { if (isNumeric) { - dvUpdates = new NumericDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc()); + if (value.hasSingleValue()) { + dvUpdates = new NumericDocValuesFieldUpdates + .SingleValueNumericDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc(), + value.getNumericValue(0)); + } else { + dvUpdates = new NumericDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc()); + } } else { dvUpdates = new BinaryDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc()); } - holder.put(updateField, dvUpdates); + resolvedUpdates.add(dvUpdates); } final IntConsumer docIdConsumer; final DocValuesFieldUpdates update = dvUpdates; @@ -582,7 +588,7 @@ final class FrozenBufferedUpdates { } // now freeze & publish: - for (DocValuesFieldUpdates update : holder.values()) { + for (DocValuesFieldUpdates update : resolvedUpdates) { if (update.any()) { update.finish(); segState.rld.addDVUpdate(update); diff --git a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesFieldUpdates.java b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesFieldUpdates.java index 626a61ed4f4..ebc196bb283 100644 --- a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesFieldUpdates.java +++ b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesFieldUpdates.java @@ -111,4 +111,24 @@ final class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates { + Long.BYTES + RamUsageEstimator.NUM_BYTES_OBJECT_REF; } + + static class SingleValueNumericDocValuesFieldUpdates extends SingleValueDocValuesFieldUpdates { + + private final long value; + + SingleValueNumericDocValuesFieldUpdates(long delGen, String field, int maxDoc, long value) { + super(maxDoc, delGen, field, DocValuesType.NUMERIC); + this.value = value; + } + + @Override + protected BytesRef binaryValue() { + throw new UnsupportedOperationException(); + } + + @Override + protected long longValue() { + return value; + } + } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFieldUpdatesBuffer.java b/lucene/core/src/test/org/apache/lucene/index/TestFieldUpdatesBuffer.java index ae4442fccd5..832c7cc7733 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestFieldUpdatesBuffer.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestFieldUpdatesBuffer.java @@ -36,9 +36,13 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase { new DocValuesUpdate.NumericDocValuesUpdate(new Term("id", "1"), "age", 6); FieldUpdatesBuffer buffer = new FieldUpdatesBuffer(counter, update, 15); buffer.addUpdate(new Term("id", "10"), 6, 15); + assertTrue(buffer.hasSingleValue()); buffer.addUpdate(new Term("id", "8"), 12, 15); + assertFalse(buffer.hasSingleValue()); buffer.addUpdate(new Term("some_other_field", "8"), 13, 17); + assertFalse(buffer.hasSingleValue()); buffer.addUpdate(new Term("id", "8"), 12, 16); + assertFalse(buffer.hasSingleValue()); assertTrue(buffer.isNumeric()); FieldUpdatesBuffer.BufferedUpdateIterator iterator = buffer.iterator(); FieldUpdatesBuffer.BufferedUpdate value = iterator.next(); @@ -214,14 +218,17 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase { int count = 0; while ((value = iterator.next()) != null) { + long v = buffer.getNumericValue(count); randomUpdate = updates.get(count++); assertEquals(randomUpdate.term.bytes.utf8ToString(), value.termValue.utf8ToString()); assertEquals(randomUpdate.term.field, value.termField); assertEquals(randomUpdate.hasValue, value.hasValue); if (randomUpdate.hasValue) { assertEquals(randomUpdate.getValue(), value.numericValue); + assertEquals(v, value.numericValue); } else { assertEquals(0, value.numericValue); + assertEquals(0, v); } assertEquals(randomUpdate.docIDUpto, value.docUpTo); }