Enable SourceLookup to leverage sequential stored fields reader (#63035) (#63316)

In #62509 we already plugged faster sequential access for stored fields in the fetch phase. This PR now adds using the potentially better field reader also in SourceLookup. Rally exeriments are showing that this speeds up e.g. when runtime fields that are using "_source" are added e.g. via "docvalue_fields" or are used in queries or aggs. Closes #62621
2020-10-06 14:34:39 +02:00 · 2020-10-06 14:34:39 +02:00 · 82096d3971
parent bea0ead08a
commit 82096d3971
3 changed files with 57 additions and 8 deletions
--- a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorMatchedSlotSubFetchPhaseTests.java
+++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorMatchedSlotSubFetchPhaseTests.java
@ -65,7 +65,13 @@ public class PercolatorMatchedSlotSubFetchPhaseTests extends ESTestCase {
                LeafReaderContext context = reader.leaves().get(0);
                // A match:
                {
-                    HitContext hit = new HitContext(new SearchHit(0), context, 0, new SourceLookup(), new HashMap<>());
+                    HitContext hit = new HitContext(
+                        new SearchHit(0),
+                        context,
+                        0,
+                        new SourceLookup(),
+                        new HashMap<>()
+                    );
                    PercolateQuery.QueryStore queryStore = ctx -> docId -> new TermQuery(new Term("field", "value"));
                    MemoryIndex memoryIndex = new MemoryIndex();
                    memoryIndex.addField("field", "value", new WhitespaceAnalyzer());
@ -86,7 +92,13 @@ public class PercolatorMatchedSlotSubFetchPhaseTests extends ESTestCase {

                // No match:
                {
-                    HitContext hit = new HitContext(new SearchHit(0), context, 0, new SourceLookup(), new HashMap<>());
+                    HitContext hit = new HitContext(
+                        new SearchHit(0),
+                        context,
+                        0,
+                        new SourceLookup(),
+                        new HashMap<>()
+                    );
                    PercolateQuery.QueryStore queryStore = ctx -> docId -> new TermQuery(new Term("field", "value"));
                    MemoryIndex memoryIndex = new MemoryIndex();
                    memoryIndex.addField("field", "value1", new WhitespaceAnalyzer());
@ -106,7 +118,13 @@ public class PercolatorMatchedSlotSubFetchPhaseTests extends ESTestCase {

                // No query:
                {
-                    HitContext hit = new HitContext(new SearchHit(0), context, 0, new SourceLookup(), new HashMap<>());
+                    HitContext hit = new HitContext(
+                        new SearchHit(0),
+                        context,
+                        0,
+                        new SourceLookup(),
+                        new HashMap<>()
+                    );
                    PercolateQuery.QueryStore queryStore = ctx -> docId -> null;
                    MemoryIndex memoryIndex = new MemoryIndex();
                    memoryIndex.addField("field", "value", new WhitespaceAnalyzer());
--- a/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java
+++ b/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java
@ -21,15 +21,19 @@ package org.elasticsearch.search.lookup;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.elasticsearch.ElasticsearchParseException;
+import org.elasticsearch.common.CheckedBiConsumer;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.collect.Tuple;
+import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
 import org.elasticsearch.common.xcontent.XContentHelper;
 import org.elasticsearch.common.xcontent.XContentType;
 import org.elasticsearch.common.xcontent.support.XContentMapValues;
 import org.elasticsearch.index.fieldvisitor.FieldsVisitor;
 import org.elasticsearch.search.fetch.subphase.FetchSourceContext;

+import java.io.IOException;
+import java.io.UncheckedIOException;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
@ -40,6 +44,7 @@ import static java.util.Collections.emptyMap;
 public class SourceLookup implements Map {

    private LeafReader reader;
+    CheckedBiConsumer<Integer, FieldsVisitor, IOException> fieldReader;

    private int docId = -1;

@ -75,7 +80,7 @@ public class SourceLookup implements Map {
        }
        try {
            FieldsVisitor sourceFieldVisitor = new FieldsVisitor(true);
-            reader.document(docId, sourceFieldVisitor);
+            fieldReader.accept(docId, sourceFieldVisitor);
            BytesReference source = sourceFieldVisitor.source();
            if (source == null) {
                this.source = emptyMap();
@ -91,7 +96,7 @@ public class SourceLookup implements Map {
        return this.source;
    }

-    public static Tuple<XContentType, Map<String, Object>> sourceAsMapAndType(BytesReference source) throws ElasticsearchParseException {
+    private static Tuple<XContentType, Map<String, Object>> sourceAsMapAndType(BytesReference source) throws ElasticsearchParseException {
        return XContentHelper.convertToMap(source, false);
    }

@ -99,12 +104,32 @@ public class SourceLookup implements Map {
        return sourceAsMapAndType(source).v2();
    }

-    public void setSegmentAndDocument(LeafReaderContext context, int docId) {
+    public void setSegmentAndDocument(
+        LeafReaderContext context,
+        int docId
+    ) {
        if (this.reader == context.reader() && this.docId == docId) {
            // if we are called with the same document, don't invalidate source
            return;
        }
-        this.reader = context.reader();
+        if (this.reader != context.reader()) {
+            this.reader = context.reader();
+            // only reset reader and fieldReader when reader changes
+            try {
+                if (context.reader() instanceof SequentialStoredFieldsLeafReader) {
+                    // All the docs to fetch are adjacent but Lucene stored fields are optimized
+                    // for random access and don't optimize for sequential access - except for merging.
+                    // So we do a little hack here and pretend we're going to do merges in order to
+                    // get better sequential access.
+                    SequentialStoredFieldsLeafReader lf = (SequentialStoredFieldsLeafReader) context.reader();
+                    fieldReader = lf.getSequentialStoredFieldsReader()::visitDocument;
+                } else {
+                    fieldReader = context.reader()::document;
+                }
+            } catch (IOException e) {
+                throw new UncheckedIOException(e);
+            }
+        }
        this.source = null;
        this.sourceAsBytes = null;
        this.docId = docId;
--- a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhaseTests.java
+++ b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhaseTests.java
@ -159,7 +159,13 @@ public class FetchSourcePhaseTests extends ESTestCase {
        // We don't need a real index, just a LeafReaderContext which cannot be mocked.
        MemoryIndex index = new MemoryIndex();
        LeafReaderContext leafReaderContext = index.createSearcher().getIndexReader().leaves().get(0);
-        HitContext hitContext = new HitContext(searchHit, leafReaderContext, 1, new SourceLookup(), new HashMap<>());
+        HitContext hitContext = new HitContext(
+            searchHit,
+            leafReaderContext,
+            1,
+            new SourceLookup(),
+            new HashMap<>()
+        );
        hitContext.sourceLookup().setSource(source == null ? null : BytesReference.bytes(source));

        FetchSourcePhase phase = new FetchSourcePhase();