mirror of https://github.com/apache/lucene.git
LUCENE-9510: Don't pull a merge instance when flushing stored fields out-of-order. (#1872)
With recent changes to stored fields that split blocks into several sub blocks, the merge instance has become much slower at random access since it would decompress all sub blocks when accessing a document. Since stored fields likely get accessed in random order at flush time when index sorting is enabled, it's better not to use the merge instance. On a synthetic benchmark that has one stored field and one numeric doc-value field that is used for sorting and fed with random values, this made indexing more than 4x faster.
This commit is contained in:
parent
fceab765c1
commit
97a4af6890
|
@ -60,7 +60,8 @@ final class SortingStoredFieldsConsumer extends StoredFieldsConsumer {
|
|||
}
|
||||
StoredFieldsReader reader = codec.storedFieldsFormat()
|
||||
.fieldsReader(tmpDirectory, state.segmentInfo, state.fieldInfos, IOContext.DEFAULT);
|
||||
StoredFieldsReader mergeReader = reader.getMergeInstance();
|
||||
// Don't pull a merge instance, since merge instances optimize for
|
||||
// sequential access while we consume stored fields in random order here.
|
||||
StoredFieldsWriter sortWriter = codec.storedFieldsFormat()
|
||||
.fieldsWriter(state.directory, state.segmentInfo, IOContext.DEFAULT);
|
||||
try {
|
||||
|
@ -68,7 +69,7 @@ final class SortingStoredFieldsConsumer extends StoredFieldsConsumer {
|
|||
CopyVisitor visitor = new CopyVisitor(sortWriter);
|
||||
for (int docID = 0; docID < state.segmentInfo.maxDoc(); docID++) {
|
||||
sortWriter.startDocument();
|
||||
mergeReader.visitDocument(sortMap.newToOld(docID), visitor);
|
||||
reader.visitDocument(sortMap.newToOld(docID), visitor);
|
||||
sortWriter.finishDocument();
|
||||
}
|
||||
sortWriter.finish(state.fieldInfos, state.segmentInfo.maxDoc());
|
||||
|
|
Loading…
Reference in New Issue