Optimize flush of doc-value fields that are effectively single-valued when an index sort is configured. (#12037)

This iterates on #399 to also optimize the case when an index sort is
configured. When cutting over the NYC taxis benchmark to the new numeric
fields,
[flush times](http://people.apache.org/~mikemccand/lucenebench/sparseResults.html#flush_times)
stayed mostly the same when index sorting is disabled and increased by 7-8%
when index sorting is enabled. I expect this change to address this slowdown.
This commit is contained in:
Adrien Grand 2022-12-27 11:12:56 +01:00 committed by GitHub
parent ddd63d2da3
commit 6f477e5831
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 114 additions and 79 deletions

View File

@ -20,6 +20,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSet; import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Counter; import org.apache.lucene.util.Counter;
@ -99,30 +100,38 @@ class NumericDocValuesWriter extends DocValuesWriter<NumericDocValues> {
if (finalValues == null) { if (finalValues == null) {
finalValues = pending.build(); finalValues = pending.build();
} }
dvConsumer.addNumericField(
fieldInfo, getDocValuesProducer(fieldInfo, finalValues, docsWithField, sortMap));
}
static DocValuesProducer getDocValuesProducer(
FieldInfo writerFieldInfo,
PackedLongValues values,
DocsWithFieldSet docsWithField,
Sorter.DocMap sortMap)
throws IOException {
final NumericDVs sorted; final NumericDVs sorted;
if (sortMap != null) { if (sortMap != null) {
NumericDocValues oldValues = NumericDocValues oldValues = new BufferedNumericDocValues(values, docsWithField.iterator());
new BufferedNumericDocValues(finalValues, docsWithField.iterator()); sorted = sortDocValues(sortMap.size(), sortMap, oldValues);
sorted = sortDocValues(state.segmentInfo.maxDoc(), sortMap, oldValues);
} else { } else {
sorted = null; sorted = null;
} }
dvConsumer.addNumericField( return new EmptyDocValuesProducer() {
fieldInfo,
new EmptyDocValuesProducer() {
@Override @Override
public NumericDocValues getNumeric(FieldInfo fieldInfo) { public NumericDocValues getNumeric(FieldInfo fieldInfo) {
if (fieldInfo != NumericDocValuesWriter.this.fieldInfo) { if (fieldInfo != writerFieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo"); throw new IllegalArgumentException("wrong fieldInfo");
} }
if (sorted == null) { if (sorted == null) {
return new BufferedNumericDocValues(finalValues, docsWithField.iterator()); return new BufferedNumericDocValues(values, docsWithField.iterator());
} else { } else {
return new SortingNumericDocValues(sorted); return new SortingNumericDocValues(sorted);
} }
} }
}); };
} }
// iterates over the values we have in ram // iterates over the values we have in ram

View File

@ -22,6 +22,7 @@ import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -109,24 +110,28 @@ class SortedDocValuesWriter extends DocValuesWriter<SortedDocValues> {
bytesUsed = newBytesUsed; bytesUsed = newBytesUsed;
} }
@Override private void finish() {
SortedDocValues getDocValues() {
int valueCount = hash.size();
if (finalSortedValues == null) { if (finalSortedValues == null) {
int valueCount = hash.size();
updateBytesUsed(); updateBytesUsed();
assert finalOrdMap == null && finalOrds == null; assert finalOrdMap == null && finalOrds == null;
finalSortedValues = hash.sort(); finalSortedValues = hash.sort();
finalOrds = pending.build(); finalOrds = pending.build();
finalOrdMap = new int[valueCount]; finalOrdMap = new int[valueCount];
}
for (int ord = 0; ord < valueCount; ord++) { for (int ord = 0; ord < valueCount; ord++) {
finalOrdMap[finalSortedValues[ord]] = ord; finalOrdMap[finalSortedValues[ord]] = ord;
} }
}
}
@Override
SortedDocValues getDocValues() {
finish();
return new BufferedSortedDocValues( return new BufferedSortedDocValues(
hash, finalOrds, finalSortedValues, finalOrdMap, docsWithField.iterator()); hash, finalOrds, finalSortedValues, finalOrdMap, docsWithField.iterator());
} }
private int[] sortDocValues(int maxDoc, Sorter.DocMap sortMap, SortedDocValues oldValues) private static int[] sortDocValues(int maxDoc, Sorter.DocMap sortMap, SortedDocValues oldValues)
throws IOException { throws IOException {
int[] ords = new int[maxDoc]; int[] ords = new int[maxDoc];
Arrays.fill(ords, -1); Arrays.fill(ords, -1);
@ -141,45 +146,48 @@ class SortedDocValuesWriter extends DocValuesWriter<SortedDocValues> {
@Override @Override
public void flush(SegmentWriteState state, Sorter.DocMap sortMap, DocValuesConsumer dvConsumer) public void flush(SegmentWriteState state, Sorter.DocMap sortMap, DocValuesConsumer dvConsumer)
throws IOException { throws IOException {
final int valueCount = hash.size(); finish();
if (finalOrds == null) {
updateBytesUsed(); dvConsumer.addSortedField(
finalSortedValues = hash.sort(); fieldInfo,
finalOrds = pending.build(); getDocValuesProducer(
finalOrdMap = new int[valueCount]; fieldInfo, hash, finalOrds, finalSortedValues, finalOrdMap, docsWithField, sortMap));
for (int ord = 0; ord < valueCount; ord++) {
finalOrdMap[finalSortedValues[ord]] = ord;
}
} }
static DocValuesProducer getDocValuesProducer(
FieldInfo writerFieldInfo,
BytesRefHash hash,
PackedLongValues ords,
int[] sortedValues,
int[] ordMap,
DocsWithFieldSet docsWithField,
Sorter.DocMap sortMap)
throws IOException {
final int[] sorted; final int[] sorted;
if (sortMap != null) { if (sortMap != null) {
sorted = sorted =
sortDocValues( sortDocValues(
state.segmentInfo.maxDoc(), sortMap.size(),
sortMap, sortMap,
new BufferedSortedDocValues( new BufferedSortedDocValues(
hash, finalOrds, finalSortedValues, finalOrdMap, docsWithField.iterator())); hash, ords, sortedValues, ordMap, docsWithField.iterator()));
} else { } else {
sorted = null; sorted = null;
} }
dvConsumer.addSortedField( return new EmptyDocValuesProducer() {
fieldInfo,
new EmptyDocValuesProducer() {
@Override @Override
public SortedDocValues getSorted(FieldInfo fieldInfoIn) { public SortedDocValues getSorted(FieldInfo fieldInfoIn) {
if (fieldInfoIn != fieldInfo) { if (fieldInfoIn != writerFieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo"); throw new IllegalArgumentException("wrong fieldInfo");
} }
final SortedDocValues buf = final SortedDocValues buf =
new BufferedSortedDocValues( new BufferedSortedDocValues(hash, ords, sortedValues, ordMap, docsWithField.iterator());
hash, finalOrds, finalSortedValues, finalOrdMap, docsWithField.iterator());
if (sorted == null) { if (sorted == null) {
return buf; return buf;
} }
return new SortingSortedDocValues(buf, sorted); return new SortingSortedDocValues(buf, sorted);
} }
}); };
} }
static class BufferedSortedDocValues extends SortedDocValues { static class BufferedSortedDocValues extends SortedDocValues {

View File

@ -21,6 +21,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.NumericDocValuesWriter.BufferedNumericDocValues; import org.apache.lucene.index.NumericDocValuesWriter.BufferedNumericDocValues;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
@ -175,6 +176,20 @@ class SortedNumericDocValuesWriter extends DocValuesWriter<SortedNumericDocValue
valueCounts = finalValuesCount; valueCounts = finalValuesCount;
} }
if (valueCounts == null) {
DocValuesProducer singleValueProducer =
NumericDocValuesWriter.getDocValuesProducer(fieldInfo, values, docsWithField, sortMap);
dvConsumer.addSortedNumericField(
fieldInfo,
new EmptyDocValuesProducer() {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo fieldInfo) throws IOException {
return DocValues.singleton(singleValueProducer.getNumeric(fieldInfo));
}
});
return;
}
final LongValues sorted; final LongValues sorted;
if (sortMap != null) { if (sortMap != null) {
sorted = sorted =

View File

@ -22,6 +22,7 @@ import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.SortedDocValuesWriter.BufferedSortedDocValues; import org.apache.lucene.index.SortedDocValuesWriter.BufferedSortedDocValues;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
@ -162,8 +163,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
bytesUsed = newBytesUsed; bytesUsed = newBytesUsed;
} }
@Override private void finish() {
SortedSetDocValues getDocValues() {
if (finalOrds == null) { if (finalOrds == null) {
assert finalOrdCounts == null && finalSortedValues == null && finalOrdMap == null; assert finalOrdCounts == null && finalSortedValues == null && finalOrdMap == null;
finishCurrentDoc(); finishCurrentDoc();
@ -172,10 +172,15 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
finalOrdCounts = pendingCounts == null ? null : pendingCounts.build(); finalOrdCounts = pendingCounts == null ? null : pendingCounts.build();
finalSortedValues = hash.sort(); finalSortedValues = hash.sort();
finalOrdMap = new int[valueCount]; finalOrdMap = new int[valueCount];
}
for (int ord = 0; ord < finalOrdMap.length; ord++) { for (int ord = 0; ord < finalOrdMap.length; ord++) {
finalOrdMap[finalSortedValues[ord]] = ord; finalOrdMap[finalSortedValues[ord]] = ord;
} }
}
}
@Override
SortedSetDocValues getDocValues() {
finish();
return getValues( return getValues(
finalSortedValues, finalOrdMap, hash, finalOrds, finalOrdCounts, maxCount, docsWithField); finalSortedValues, finalOrdMap, hash, finalOrds, finalOrdCounts, maxCount, docsWithField);
} }
@ -200,27 +205,25 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
@Override @Override
public void flush(SegmentWriteState state, Sorter.DocMap sortMap, DocValuesConsumer dvConsumer) public void flush(SegmentWriteState state, Sorter.DocMap sortMap, DocValuesConsumer dvConsumer)
throws IOException { throws IOException {
final int valueCount = hash.size(); finish();
final PackedLongValues ords; final PackedLongValues ords = finalOrds;
final PackedLongValues ordCounts; final PackedLongValues ordCounts = finalOrdCounts;
final int[] sortedValues; final int[] sortedValues = finalSortedValues;
final int[] ordMap; final int[] ordMap = finalOrdMap;
if (finalOrds == null) { if (ordCounts == null) {
assert finalOrdCounts == null && finalSortedValues == null && finalOrdMap == null; DocValuesProducer singleValueProducer =
finishCurrentDoc(); SortedDocValuesWriter.getDocValuesProducer(
ords = pending.build(); fieldInfo, hash, ords, sortedValues, ordMap, docsWithField, sortMap);
ordCounts = pendingCounts == null ? null : pendingCounts.build(); dvConsumer.addSortedSetField(
sortedValues = hash.sort(); fieldInfo,
ordMap = new int[valueCount]; new EmptyDocValuesProducer() {
for (int ord = 0; ord < valueCount; ord++) { @Override
ordMap[sortedValues[ord]] = ord; public SortedSetDocValues getSortedSet(FieldInfo fieldInfo) throws IOException {
return DocValues.singleton(singleValueProducer.getSorted(fieldInfo));
} }
} else { });
ords = finalOrds; return;
ordCounts = finalOrdCounts;
sortedValues = finalSortedValues;
ordMap = finalOrdMap;
} }
final DocOrds docOrds; final DocOrds docOrds;