LUCENE-9510: Don't pull a merge instance when flushing stored fields out-of-order. (#1872)

With recent changes to stored fields that split blocks into several sub blocks, the merge instance has become much slower at random access since it would decompress all sub blocks when accessing a document. Since stored fields likely get accessed in random order at flush time when index sorting is enabled, it's better not to use the merge instance. On a synthetic benchmark that has one stored field and one numeric doc-value field that is used for sorting and fed with random values, this made indexing more than 4x faster.
2020-09-14 18:07:04 +02:00 · 2020-09-14 18:07:04 +02:00 · 97a4af6890
parent fceab765c1
commit 97a4af6890
1 changed files with 3 additions and 2 deletions
--- a/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java
@ -60,7 +60,8 @@ final class SortingStoredFieldsConsumer extends StoredFieldsConsumer {
    }
    StoredFieldsReader reader = codec.storedFieldsFormat()
        .fieldsReader(tmpDirectory, state.segmentInfo, state.fieldInfos, IOContext.DEFAULT);
-    StoredFieldsReader mergeReader = reader.getMergeInstance();
+    // Don't pull a merge instance, since merge instances optimize for
+    // sequential access while we consume stored fields in random order here.
    StoredFieldsWriter sortWriter = codec.storedFieldsFormat()
        .fieldsWriter(state.directory, state.segmentInfo, IOContext.DEFAULT);
    try {
@ -68,7 +69,7 @@ final class SortingStoredFieldsConsumer extends StoredFieldsConsumer {
      CopyVisitor visitor = new CopyVisitor(sortWriter);
      for (int docID = 0; docID < state.segmentInfo.maxDoc(); docID++) {
        sortWriter.startDocument();
-        mergeReader.visitDocument(sortMap.newToOld(docID), visitor);
+        reader.visitDocument(sortMap.newToOld(docID), visitor);
        sortWriter.finishDocument();
      }
      sortWriter.finish(state.fieldInfos, state.segmentInfo.maxDoc());