mirror of https://github.com/apache/lucene.git
Optimize flush of doc-value fields that are effectively single-valued when an index sort is configured. (#12037)
This iterates on #399 to also optimize the case when an index sort is configured. When cutting over the NYC taxis benchmark to the new numeric fields, [flush times](http://people.apache.org/~mikemccand/lucenebench/sparseResults.html#flush_times) stayed mostly the same when index sorting is disabled and increased by 7-8% when index sorting is enabled. I expect this change to address this slowdown.
This commit is contained in:
parent
ddd63d2da3
commit
6f477e5831
|
@ -20,6 +20,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||||
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.util.BitSet;
|
import org.apache.lucene.util.BitSet;
|
||||||
import org.apache.lucene.util.Counter;
|
import org.apache.lucene.util.Counter;
|
||||||
|
@ -99,30 +100,38 @@ class NumericDocValuesWriter extends DocValuesWriter<NumericDocValues> {
|
||||||
if (finalValues == null) {
|
if (finalValues == null) {
|
||||||
finalValues = pending.build();
|
finalValues = pending.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dvConsumer.addNumericField(
|
||||||
|
fieldInfo, getDocValuesProducer(fieldInfo, finalValues, docsWithField, sortMap));
|
||||||
|
}
|
||||||
|
|
||||||
|
static DocValuesProducer getDocValuesProducer(
|
||||||
|
FieldInfo writerFieldInfo,
|
||||||
|
PackedLongValues values,
|
||||||
|
DocsWithFieldSet docsWithField,
|
||||||
|
Sorter.DocMap sortMap)
|
||||||
|
throws IOException {
|
||||||
final NumericDVs sorted;
|
final NumericDVs sorted;
|
||||||
if (sortMap != null) {
|
if (sortMap != null) {
|
||||||
NumericDocValues oldValues =
|
NumericDocValues oldValues = new BufferedNumericDocValues(values, docsWithField.iterator());
|
||||||
new BufferedNumericDocValues(finalValues, docsWithField.iterator());
|
sorted = sortDocValues(sortMap.size(), sortMap, oldValues);
|
||||||
sorted = sortDocValues(state.segmentInfo.maxDoc(), sortMap, oldValues);
|
|
||||||
} else {
|
} else {
|
||||||
sorted = null;
|
sorted = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
dvConsumer.addNumericField(
|
return new EmptyDocValuesProducer() {
|
||||||
fieldInfo,
|
|
||||||
new EmptyDocValuesProducer() {
|
|
||||||
@Override
|
@Override
|
||||||
public NumericDocValues getNumeric(FieldInfo fieldInfo) {
|
public NumericDocValues getNumeric(FieldInfo fieldInfo) {
|
||||||
if (fieldInfo != NumericDocValuesWriter.this.fieldInfo) {
|
if (fieldInfo != writerFieldInfo) {
|
||||||
throw new IllegalArgumentException("wrong fieldInfo");
|
throw new IllegalArgumentException("wrong fieldInfo");
|
||||||
}
|
}
|
||||||
if (sorted == null) {
|
if (sorted == null) {
|
||||||
return new BufferedNumericDocValues(finalValues, docsWithField.iterator());
|
return new BufferedNumericDocValues(values, docsWithField.iterator());
|
||||||
} else {
|
} else {
|
||||||
return new SortingNumericDocValues(sorted);
|
return new SortingNumericDocValues(sorted);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// iterates over the values we have in ram
|
// iterates over the values we have in ram
|
||||||
|
|
|
@ -22,6 +22,7 @@ import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||||
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.util.ByteBlockPool;
|
import org.apache.lucene.util.ByteBlockPool;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -109,24 +110,28 @@ class SortedDocValuesWriter extends DocValuesWriter<SortedDocValues> {
|
||||||
bytesUsed = newBytesUsed;
|
bytesUsed = newBytesUsed;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
private void finish() {
|
||||||
SortedDocValues getDocValues() {
|
|
||||||
int valueCount = hash.size();
|
|
||||||
if (finalSortedValues == null) {
|
if (finalSortedValues == null) {
|
||||||
|
int valueCount = hash.size();
|
||||||
updateBytesUsed();
|
updateBytesUsed();
|
||||||
assert finalOrdMap == null && finalOrds == null;
|
assert finalOrdMap == null && finalOrds == null;
|
||||||
finalSortedValues = hash.sort();
|
finalSortedValues = hash.sort();
|
||||||
finalOrds = pending.build();
|
finalOrds = pending.build();
|
||||||
finalOrdMap = new int[valueCount];
|
finalOrdMap = new int[valueCount];
|
||||||
}
|
|
||||||
for (int ord = 0; ord < valueCount; ord++) {
|
for (int ord = 0; ord < valueCount; ord++) {
|
||||||
finalOrdMap[finalSortedValues[ord]] = ord;
|
finalOrdMap[finalSortedValues[ord]] = ord;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
SortedDocValues getDocValues() {
|
||||||
|
finish();
|
||||||
return new BufferedSortedDocValues(
|
return new BufferedSortedDocValues(
|
||||||
hash, finalOrds, finalSortedValues, finalOrdMap, docsWithField.iterator());
|
hash, finalOrds, finalSortedValues, finalOrdMap, docsWithField.iterator());
|
||||||
}
|
}
|
||||||
|
|
||||||
private int[] sortDocValues(int maxDoc, Sorter.DocMap sortMap, SortedDocValues oldValues)
|
private static int[] sortDocValues(int maxDoc, Sorter.DocMap sortMap, SortedDocValues oldValues)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
int[] ords = new int[maxDoc];
|
int[] ords = new int[maxDoc];
|
||||||
Arrays.fill(ords, -1);
|
Arrays.fill(ords, -1);
|
||||||
|
@ -141,45 +146,48 @@ class SortedDocValuesWriter extends DocValuesWriter<SortedDocValues> {
|
||||||
@Override
|
@Override
|
||||||
public void flush(SegmentWriteState state, Sorter.DocMap sortMap, DocValuesConsumer dvConsumer)
|
public void flush(SegmentWriteState state, Sorter.DocMap sortMap, DocValuesConsumer dvConsumer)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final int valueCount = hash.size();
|
finish();
|
||||||
if (finalOrds == null) {
|
|
||||||
updateBytesUsed();
|
dvConsumer.addSortedField(
|
||||||
finalSortedValues = hash.sort();
|
fieldInfo,
|
||||||
finalOrds = pending.build();
|
getDocValuesProducer(
|
||||||
finalOrdMap = new int[valueCount];
|
fieldInfo, hash, finalOrds, finalSortedValues, finalOrdMap, docsWithField, sortMap));
|
||||||
for (int ord = 0; ord < valueCount; ord++) {
|
|
||||||
finalOrdMap[finalSortedValues[ord]] = ord;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static DocValuesProducer getDocValuesProducer(
|
||||||
|
FieldInfo writerFieldInfo,
|
||||||
|
BytesRefHash hash,
|
||||||
|
PackedLongValues ords,
|
||||||
|
int[] sortedValues,
|
||||||
|
int[] ordMap,
|
||||||
|
DocsWithFieldSet docsWithField,
|
||||||
|
Sorter.DocMap sortMap)
|
||||||
|
throws IOException {
|
||||||
final int[] sorted;
|
final int[] sorted;
|
||||||
if (sortMap != null) {
|
if (sortMap != null) {
|
||||||
sorted =
|
sorted =
|
||||||
sortDocValues(
|
sortDocValues(
|
||||||
state.segmentInfo.maxDoc(),
|
sortMap.size(),
|
||||||
sortMap,
|
sortMap,
|
||||||
new BufferedSortedDocValues(
|
new BufferedSortedDocValues(
|
||||||
hash, finalOrds, finalSortedValues, finalOrdMap, docsWithField.iterator()));
|
hash, ords, sortedValues, ordMap, docsWithField.iterator()));
|
||||||
} else {
|
} else {
|
||||||
sorted = null;
|
sorted = null;
|
||||||
}
|
}
|
||||||
dvConsumer.addSortedField(
|
return new EmptyDocValuesProducer() {
|
||||||
fieldInfo,
|
|
||||||
new EmptyDocValuesProducer() {
|
|
||||||
@Override
|
@Override
|
||||||
public SortedDocValues getSorted(FieldInfo fieldInfoIn) {
|
public SortedDocValues getSorted(FieldInfo fieldInfoIn) {
|
||||||
if (fieldInfoIn != fieldInfo) {
|
if (fieldInfoIn != writerFieldInfo) {
|
||||||
throw new IllegalArgumentException("wrong fieldInfo");
|
throw new IllegalArgumentException("wrong fieldInfo");
|
||||||
}
|
}
|
||||||
final SortedDocValues buf =
|
final SortedDocValues buf =
|
||||||
new BufferedSortedDocValues(
|
new BufferedSortedDocValues(hash, ords, sortedValues, ordMap, docsWithField.iterator());
|
||||||
hash, finalOrds, finalSortedValues, finalOrdMap, docsWithField.iterator());
|
|
||||||
if (sorted == null) {
|
if (sorted == null) {
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
return new SortingSortedDocValues(buf, sorted);
|
return new SortingSortedDocValues(buf, sorted);
|
||||||
}
|
}
|
||||||
});
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
static class BufferedSortedDocValues extends SortedDocValues {
|
static class BufferedSortedDocValues extends SortedDocValues {
|
||||||
|
|
|
@ -21,6 +21,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||||
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
import org.apache.lucene.index.NumericDocValuesWriter.BufferedNumericDocValues;
|
import org.apache.lucene.index.NumericDocValuesWriter.BufferedNumericDocValues;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
@ -175,6 +176,20 @@ class SortedNumericDocValuesWriter extends DocValuesWriter<SortedNumericDocValue
|
||||||
valueCounts = finalValuesCount;
|
valueCounts = finalValuesCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (valueCounts == null) {
|
||||||
|
DocValuesProducer singleValueProducer =
|
||||||
|
NumericDocValuesWriter.getDocValuesProducer(fieldInfo, values, docsWithField, sortMap);
|
||||||
|
dvConsumer.addSortedNumericField(
|
||||||
|
fieldInfo,
|
||||||
|
new EmptyDocValuesProducer() {
|
||||||
|
@Override
|
||||||
|
public SortedNumericDocValues getSortedNumeric(FieldInfo fieldInfo) throws IOException {
|
||||||
|
return DocValues.singleton(singleValueProducer.getNumeric(fieldInfo));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
final LongValues sorted;
|
final LongValues sorted;
|
||||||
if (sortMap != null) {
|
if (sortMap != null) {
|
||||||
sorted =
|
sorted =
|
||||||
|
|
|
@ -22,6 +22,7 @@ import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||||
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
import org.apache.lucene.index.SortedDocValuesWriter.BufferedSortedDocValues;
|
import org.apache.lucene.index.SortedDocValuesWriter.BufferedSortedDocValues;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
@ -162,8 +163,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
||||||
bytesUsed = newBytesUsed;
|
bytesUsed = newBytesUsed;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
private void finish() {
|
||||||
SortedSetDocValues getDocValues() {
|
|
||||||
if (finalOrds == null) {
|
if (finalOrds == null) {
|
||||||
assert finalOrdCounts == null && finalSortedValues == null && finalOrdMap == null;
|
assert finalOrdCounts == null && finalSortedValues == null && finalOrdMap == null;
|
||||||
finishCurrentDoc();
|
finishCurrentDoc();
|
||||||
|
@ -172,10 +172,15 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
||||||
finalOrdCounts = pendingCounts == null ? null : pendingCounts.build();
|
finalOrdCounts = pendingCounts == null ? null : pendingCounts.build();
|
||||||
finalSortedValues = hash.sort();
|
finalSortedValues = hash.sort();
|
||||||
finalOrdMap = new int[valueCount];
|
finalOrdMap = new int[valueCount];
|
||||||
}
|
|
||||||
for (int ord = 0; ord < finalOrdMap.length; ord++) {
|
for (int ord = 0; ord < finalOrdMap.length; ord++) {
|
||||||
finalOrdMap[finalSortedValues[ord]] = ord;
|
finalOrdMap[finalSortedValues[ord]] = ord;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
SortedSetDocValues getDocValues() {
|
||||||
|
finish();
|
||||||
return getValues(
|
return getValues(
|
||||||
finalSortedValues, finalOrdMap, hash, finalOrds, finalOrdCounts, maxCount, docsWithField);
|
finalSortedValues, finalOrdMap, hash, finalOrds, finalOrdCounts, maxCount, docsWithField);
|
||||||
}
|
}
|
||||||
|
@ -200,27 +205,25 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
||||||
@Override
|
@Override
|
||||||
public void flush(SegmentWriteState state, Sorter.DocMap sortMap, DocValuesConsumer dvConsumer)
|
public void flush(SegmentWriteState state, Sorter.DocMap sortMap, DocValuesConsumer dvConsumer)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final int valueCount = hash.size();
|
finish();
|
||||||
final PackedLongValues ords;
|
final PackedLongValues ords = finalOrds;
|
||||||
final PackedLongValues ordCounts;
|
final PackedLongValues ordCounts = finalOrdCounts;
|
||||||
final int[] sortedValues;
|
final int[] sortedValues = finalSortedValues;
|
||||||
final int[] ordMap;
|
final int[] ordMap = finalOrdMap;
|
||||||
|
|
||||||
if (finalOrds == null) {
|
if (ordCounts == null) {
|
||||||
assert finalOrdCounts == null && finalSortedValues == null && finalOrdMap == null;
|
DocValuesProducer singleValueProducer =
|
||||||
finishCurrentDoc();
|
SortedDocValuesWriter.getDocValuesProducer(
|
||||||
ords = pending.build();
|
fieldInfo, hash, ords, sortedValues, ordMap, docsWithField, sortMap);
|
||||||
ordCounts = pendingCounts == null ? null : pendingCounts.build();
|
dvConsumer.addSortedSetField(
|
||||||
sortedValues = hash.sort();
|
fieldInfo,
|
||||||
ordMap = new int[valueCount];
|
new EmptyDocValuesProducer() {
|
||||||
for (int ord = 0; ord < valueCount; ord++) {
|
@Override
|
||||||
ordMap[sortedValues[ord]] = ord;
|
public SortedSetDocValues getSortedSet(FieldInfo fieldInfo) throws IOException {
|
||||||
|
return DocValues.singleton(singleValueProducer.getSorted(fieldInfo));
|
||||||
}
|
}
|
||||||
} else {
|
});
|
||||||
ords = finalOrds;
|
return;
|
||||||
ordCounts = finalOrdCounts;
|
|
||||||
sortedValues = finalSortedValues;
|
|
||||||
ordMap = finalOrdMap;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
final DocOrds docOrds;
|
final DocOrds docOrds;
|
||||||
|
|
Loading…
Reference in New Issue