mirror of https://github.com/apache/lucene.git
Optimize flush of doc-value fields that are effectively single-valued when an index sort is configured. (#12037)
This iterates on #399 to also optimize the case when an index sort is configured. When cutting over the NYC taxis benchmark to the new numeric fields, [flush times](http://people.apache.org/~mikemccand/lucenebench/sparseResults.html#flush_times) stayed mostly the same when index sorting is disabled and increased by 7-8% when index sorting is enabled. I expect this change to address this slowdown.
This commit is contained in:
parent
ddd63d2da3
commit
6f477e5831
|
@ -20,6 +20,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
|||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BitSet;
|
||||
import org.apache.lucene.util.Counter;
|
||||
|
@ -99,30 +100,38 @@ class NumericDocValuesWriter extends DocValuesWriter<NumericDocValues> {
|
|||
if (finalValues == null) {
|
||||
finalValues = pending.build();
|
||||
}
|
||||
|
||||
dvConsumer.addNumericField(
|
||||
fieldInfo, getDocValuesProducer(fieldInfo, finalValues, docsWithField, sortMap));
|
||||
}
|
||||
|
||||
static DocValuesProducer getDocValuesProducer(
|
||||
FieldInfo writerFieldInfo,
|
||||
PackedLongValues values,
|
||||
DocsWithFieldSet docsWithField,
|
||||
Sorter.DocMap sortMap)
|
||||
throws IOException {
|
||||
final NumericDVs sorted;
|
||||
if (sortMap != null) {
|
||||
NumericDocValues oldValues =
|
||||
new BufferedNumericDocValues(finalValues, docsWithField.iterator());
|
||||
sorted = sortDocValues(state.segmentInfo.maxDoc(), sortMap, oldValues);
|
||||
NumericDocValues oldValues = new BufferedNumericDocValues(values, docsWithField.iterator());
|
||||
sorted = sortDocValues(sortMap.size(), sortMap, oldValues);
|
||||
} else {
|
||||
sorted = null;
|
||||
}
|
||||
|
||||
dvConsumer.addNumericField(
|
||||
fieldInfo,
|
||||
new EmptyDocValuesProducer() {
|
||||
@Override
|
||||
public NumericDocValues getNumeric(FieldInfo fieldInfo) {
|
||||
if (fieldInfo != NumericDocValuesWriter.this.fieldInfo) {
|
||||
throw new IllegalArgumentException("wrong fieldInfo");
|
||||
}
|
||||
if (sorted == null) {
|
||||
return new BufferedNumericDocValues(finalValues, docsWithField.iterator());
|
||||
} else {
|
||||
return new SortingNumericDocValues(sorted);
|
||||
}
|
||||
}
|
||||
});
|
||||
return new EmptyDocValuesProducer() {
|
||||
@Override
|
||||
public NumericDocValues getNumeric(FieldInfo fieldInfo) {
|
||||
if (fieldInfo != writerFieldInfo) {
|
||||
throw new IllegalArgumentException("wrong fieldInfo");
|
||||
}
|
||||
if (sorted == null) {
|
||||
return new BufferedNumericDocValues(values, docsWithField.iterator());
|
||||
} else {
|
||||
return new SortingNumericDocValues(sorted);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// iterates over the values we have in ram
|
||||
|
|
|
@ -22,6 +22,7 @@ import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
|
|||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -109,24 +110,28 @@ class SortedDocValuesWriter extends DocValuesWriter<SortedDocValues> {
|
|||
bytesUsed = newBytesUsed;
|
||||
}
|
||||
|
||||
@Override
|
||||
SortedDocValues getDocValues() {
|
||||
int valueCount = hash.size();
|
||||
private void finish() {
|
||||
if (finalSortedValues == null) {
|
||||
int valueCount = hash.size();
|
||||
updateBytesUsed();
|
||||
assert finalOrdMap == null && finalOrds == null;
|
||||
finalSortedValues = hash.sort();
|
||||
finalOrds = pending.build();
|
||||
finalOrdMap = new int[valueCount];
|
||||
for (int ord = 0; ord < valueCount; ord++) {
|
||||
finalOrdMap[finalSortedValues[ord]] = ord;
|
||||
}
|
||||
}
|
||||
for (int ord = 0; ord < valueCount; ord++) {
|
||||
finalOrdMap[finalSortedValues[ord]] = ord;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
SortedDocValues getDocValues() {
|
||||
finish();
|
||||
return new BufferedSortedDocValues(
|
||||
hash, finalOrds, finalSortedValues, finalOrdMap, docsWithField.iterator());
|
||||
}
|
||||
|
||||
private int[] sortDocValues(int maxDoc, Sorter.DocMap sortMap, SortedDocValues oldValues)
|
||||
private static int[] sortDocValues(int maxDoc, Sorter.DocMap sortMap, SortedDocValues oldValues)
|
||||
throws IOException {
|
||||
int[] ords = new int[maxDoc];
|
||||
Arrays.fill(ords, -1);
|
||||
|
@ -141,45 +146,48 @@ class SortedDocValuesWriter extends DocValuesWriter<SortedDocValues> {
|
|||
@Override
|
||||
public void flush(SegmentWriteState state, Sorter.DocMap sortMap, DocValuesConsumer dvConsumer)
|
||||
throws IOException {
|
||||
final int valueCount = hash.size();
|
||||
if (finalOrds == null) {
|
||||
updateBytesUsed();
|
||||
finalSortedValues = hash.sort();
|
||||
finalOrds = pending.build();
|
||||
finalOrdMap = new int[valueCount];
|
||||
for (int ord = 0; ord < valueCount; ord++) {
|
||||
finalOrdMap[finalSortedValues[ord]] = ord;
|
||||
}
|
||||
}
|
||||
finish();
|
||||
|
||||
dvConsumer.addSortedField(
|
||||
fieldInfo,
|
||||
getDocValuesProducer(
|
||||
fieldInfo, hash, finalOrds, finalSortedValues, finalOrdMap, docsWithField, sortMap));
|
||||
}
|
||||
|
||||
static DocValuesProducer getDocValuesProducer(
|
||||
FieldInfo writerFieldInfo,
|
||||
BytesRefHash hash,
|
||||
PackedLongValues ords,
|
||||
int[] sortedValues,
|
||||
int[] ordMap,
|
||||
DocsWithFieldSet docsWithField,
|
||||
Sorter.DocMap sortMap)
|
||||
throws IOException {
|
||||
final int[] sorted;
|
||||
if (sortMap != null) {
|
||||
sorted =
|
||||
sortDocValues(
|
||||
state.segmentInfo.maxDoc(),
|
||||
sortMap.size(),
|
||||
sortMap,
|
||||
new BufferedSortedDocValues(
|
||||
hash, finalOrds, finalSortedValues, finalOrdMap, docsWithField.iterator()));
|
||||
hash, ords, sortedValues, ordMap, docsWithField.iterator()));
|
||||
} else {
|
||||
sorted = null;
|
||||
}
|
||||
dvConsumer.addSortedField(
|
||||
fieldInfo,
|
||||
new EmptyDocValuesProducer() {
|
||||
@Override
|
||||
public SortedDocValues getSorted(FieldInfo fieldInfoIn) {
|
||||
if (fieldInfoIn != fieldInfo) {
|
||||
throw new IllegalArgumentException("wrong fieldInfo");
|
||||
}
|
||||
final SortedDocValues buf =
|
||||
new BufferedSortedDocValues(
|
||||
hash, finalOrds, finalSortedValues, finalOrdMap, docsWithField.iterator());
|
||||
if (sorted == null) {
|
||||
return buf;
|
||||
}
|
||||
return new SortingSortedDocValues(buf, sorted);
|
||||
}
|
||||
});
|
||||
return new EmptyDocValuesProducer() {
|
||||
@Override
|
||||
public SortedDocValues getSorted(FieldInfo fieldInfoIn) {
|
||||
if (fieldInfoIn != writerFieldInfo) {
|
||||
throw new IllegalArgumentException("wrong fieldInfo");
|
||||
}
|
||||
final SortedDocValues buf =
|
||||
new BufferedSortedDocValues(hash, ords, sortedValues, ordMap, docsWithField.iterator());
|
||||
if (sorted == null) {
|
||||
return buf;
|
||||
}
|
||||
return new SortingSortedDocValues(buf, sorted);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static class BufferedSortedDocValues extends SortedDocValues {
|
||||
|
|
|
@ -21,6 +21,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
|||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.index.NumericDocValuesWriter.BufferedNumericDocValues;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -175,6 +176,20 @@ class SortedNumericDocValuesWriter extends DocValuesWriter<SortedNumericDocValue
|
|||
valueCounts = finalValuesCount;
|
||||
}
|
||||
|
||||
if (valueCounts == null) {
|
||||
DocValuesProducer singleValueProducer =
|
||||
NumericDocValuesWriter.getDocValuesProducer(fieldInfo, values, docsWithField, sortMap);
|
||||
dvConsumer.addSortedNumericField(
|
||||
fieldInfo,
|
||||
new EmptyDocValuesProducer() {
|
||||
@Override
|
||||
public SortedNumericDocValues getSortedNumeric(FieldInfo fieldInfo) throws IOException {
|
||||
return DocValues.singleton(singleValueProducer.getNumeric(fieldInfo));
|
||||
}
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
final LongValues sorted;
|
||||
if (sortMap != null) {
|
||||
sorted =
|
||||
|
|
|
@ -22,6 +22,7 @@ import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
|
|||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.index.SortedDocValuesWriter.BufferedSortedDocValues;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -162,8 +163,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
|||
bytesUsed = newBytesUsed;
|
||||
}
|
||||
|
||||
@Override
|
||||
SortedSetDocValues getDocValues() {
|
||||
private void finish() {
|
||||
if (finalOrds == null) {
|
||||
assert finalOrdCounts == null && finalSortedValues == null && finalOrdMap == null;
|
||||
finishCurrentDoc();
|
||||
|
@ -172,10 +172,15 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
|||
finalOrdCounts = pendingCounts == null ? null : pendingCounts.build();
|
||||
finalSortedValues = hash.sort();
|
||||
finalOrdMap = new int[valueCount];
|
||||
for (int ord = 0; ord < finalOrdMap.length; ord++) {
|
||||
finalOrdMap[finalSortedValues[ord]] = ord;
|
||||
}
|
||||
}
|
||||
for (int ord = 0; ord < finalOrdMap.length; ord++) {
|
||||
finalOrdMap[finalSortedValues[ord]] = ord;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
SortedSetDocValues getDocValues() {
|
||||
finish();
|
||||
return getValues(
|
||||
finalSortedValues, finalOrdMap, hash, finalOrds, finalOrdCounts, maxCount, docsWithField);
|
||||
}
|
||||
|
@ -200,27 +205,25 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
|
|||
@Override
|
||||
public void flush(SegmentWriteState state, Sorter.DocMap sortMap, DocValuesConsumer dvConsumer)
|
||||
throws IOException {
|
||||
final int valueCount = hash.size();
|
||||
final PackedLongValues ords;
|
||||
final PackedLongValues ordCounts;
|
||||
final int[] sortedValues;
|
||||
final int[] ordMap;
|
||||
finish();
|
||||
final PackedLongValues ords = finalOrds;
|
||||
final PackedLongValues ordCounts = finalOrdCounts;
|
||||
final int[] sortedValues = finalSortedValues;
|
||||
final int[] ordMap = finalOrdMap;
|
||||
|
||||
if (finalOrds == null) {
|
||||
assert finalOrdCounts == null && finalSortedValues == null && finalOrdMap == null;
|
||||
finishCurrentDoc();
|
||||
ords = pending.build();
|
||||
ordCounts = pendingCounts == null ? null : pendingCounts.build();
|
||||
sortedValues = hash.sort();
|
||||
ordMap = new int[valueCount];
|
||||
for (int ord = 0; ord < valueCount; ord++) {
|
||||
ordMap[sortedValues[ord]] = ord;
|
||||
}
|
||||
} else {
|
||||
ords = finalOrds;
|
||||
ordCounts = finalOrdCounts;
|
||||
sortedValues = finalSortedValues;
|
||||
ordMap = finalOrdMap;
|
||||
if (ordCounts == null) {
|
||||
DocValuesProducer singleValueProducer =
|
||||
SortedDocValuesWriter.getDocValuesProducer(
|
||||
fieldInfo, hash, ords, sortedValues, ordMap, docsWithField, sortMap);
|
||||
dvConsumer.addSortedSetField(
|
||||
fieldInfo,
|
||||
new EmptyDocValuesProducer() {
|
||||
@Override
|
||||
public SortedSetDocValues getSortedSet(FieldInfo fieldInfo) throws IOException {
|
||||
return DocValues.singleton(singleValueProducer.getSorted(fieldInfo));
|
||||
}
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
final DocOrds docOrds;
|
||||
|
|
Loading…
Reference in New Issue