From 82096d39710673579322eefda7b399a4bfe6a8f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Tue, 6 Oct 2020 14:34:39 +0200 Subject: [PATCH] Enable SourceLookup to leverage sequential stored fields reader (#63035) (#63316) In #62509 we already plugged faster sequential access for stored fields in the fetch phase. This PR now adds using the potentially better field reader also in SourceLookup. Rally exeriments are showing that this speeds up e.g. when runtime fields that are using "_source" are added e.g. via "docvalue_fields" or are used in queries or aggs. Closes #62621 --- ...rcolatorMatchedSlotSubFetchPhaseTests.java | 24 ++++++++++++-- .../search/lookup/SourceLookup.java | 33 ++++++++++++++++--- .../fetch/subphase/FetchSourcePhaseTests.java | 8 ++++- 3 files changed, 57 insertions(+), 8 deletions(-) diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorMatchedSlotSubFetchPhaseTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorMatchedSlotSubFetchPhaseTests.java index df122bc8c32..eed8d31c2d4 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorMatchedSlotSubFetchPhaseTests.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/PercolatorMatchedSlotSubFetchPhaseTests.java @@ -65,7 +65,13 @@ public class PercolatorMatchedSlotSubFetchPhaseTests extends ESTestCase { LeafReaderContext context = reader.leaves().get(0); // A match: { - HitContext hit = new HitContext(new SearchHit(0), context, 0, new SourceLookup(), new HashMap<>()); + HitContext hit = new HitContext( + new SearchHit(0), + context, + 0, + new SourceLookup(), + new HashMap<>() + ); PercolateQuery.QueryStore queryStore = ctx -> docId -> new TermQuery(new Term("field", "value")); MemoryIndex memoryIndex = new MemoryIndex(); memoryIndex.addField("field", "value", new WhitespaceAnalyzer()); @@ -86,7 +92,13 @@ public class PercolatorMatchedSlotSubFetchPhaseTests extends ESTestCase { // No match: { - HitContext hit = new HitContext(new SearchHit(0), context, 0, new SourceLookup(), new HashMap<>()); + HitContext hit = new HitContext( + new SearchHit(0), + context, + 0, + new SourceLookup(), + new HashMap<>() + ); PercolateQuery.QueryStore queryStore = ctx -> docId -> new TermQuery(new Term("field", "value")); MemoryIndex memoryIndex = new MemoryIndex(); memoryIndex.addField("field", "value1", new WhitespaceAnalyzer()); @@ -106,7 +118,13 @@ public class PercolatorMatchedSlotSubFetchPhaseTests extends ESTestCase { // No query: { - HitContext hit = new HitContext(new SearchHit(0), context, 0, new SourceLookup(), new HashMap<>()); + HitContext hit = new HitContext( + new SearchHit(0), + context, + 0, + new SourceLookup(), + new HashMap<>() + ); PercolateQuery.QueryStore queryStore = ctx -> docId -> null; MemoryIndex memoryIndex = new MemoryIndex(); memoryIndex.addField("field", "value", new WhitespaceAnalyzer()); diff --git a/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java b/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java index 9001d77eebb..e5be3b21b78 100644 --- a/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java +++ b/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java @@ -21,15 +21,19 @@ package org.elasticsearch.search.lookup; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.common.CheckedBiConsumer; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.fieldvisitor.FieldsVisitor; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; +import java.io.IOException; +import java.io.UncheckedIOException; import java.util.Collection; import java.util.List; import java.util.Map; @@ -40,6 +44,7 @@ import static java.util.Collections.emptyMap; public class SourceLookup implements Map { private LeafReader reader; + CheckedBiConsumer fieldReader; private int docId = -1; @@ -75,7 +80,7 @@ public class SourceLookup implements Map { } try { FieldsVisitor sourceFieldVisitor = new FieldsVisitor(true); - reader.document(docId, sourceFieldVisitor); + fieldReader.accept(docId, sourceFieldVisitor); BytesReference source = sourceFieldVisitor.source(); if (source == null) { this.source = emptyMap(); @@ -91,7 +96,7 @@ public class SourceLookup implements Map { return this.source; } - public static Tuple> sourceAsMapAndType(BytesReference source) throws ElasticsearchParseException { + private static Tuple> sourceAsMapAndType(BytesReference source) throws ElasticsearchParseException { return XContentHelper.convertToMap(source, false); } @@ -99,12 +104,32 @@ public class SourceLookup implements Map { return sourceAsMapAndType(source).v2(); } - public void setSegmentAndDocument(LeafReaderContext context, int docId) { + public void setSegmentAndDocument( + LeafReaderContext context, + int docId + ) { if (this.reader == context.reader() && this.docId == docId) { // if we are called with the same document, don't invalidate source return; } - this.reader = context.reader(); + if (this.reader != context.reader()) { + this.reader = context.reader(); + // only reset reader and fieldReader when reader changes + try { + if (context.reader() instanceof SequentialStoredFieldsLeafReader) { + // All the docs to fetch are adjacent but Lucene stored fields are optimized + // for random access and don't optimize for sequential access - except for merging. + // So we do a little hack here and pretend we're going to do merges in order to + // get better sequential access. + SequentialStoredFieldsLeafReader lf = (SequentialStoredFieldsLeafReader) context.reader(); + fieldReader = lf.getSequentialStoredFieldsReader()::visitDocument; + } else { + fieldReader = context.reader()::document; + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } this.source = null; this.sourceAsBytes = null; this.docId = docId; diff --git a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhaseTests.java b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhaseTests.java index c6076aff8bb..b8cccdfb558 100644 --- a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhaseTests.java +++ b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhaseTests.java @@ -159,7 +159,13 @@ public class FetchSourcePhaseTests extends ESTestCase { // We don't need a real index, just a LeafReaderContext which cannot be mocked. MemoryIndex index = new MemoryIndex(); LeafReaderContext leafReaderContext = index.createSearcher().getIndexReader().leaves().get(0); - HitContext hitContext = new HitContext(searchHit, leafReaderContext, 1, new SourceLookup(), new HashMap<>()); + HitContext hitContext = new HitContext( + searchHit, + leafReaderContext, + 1, + new SourceLookup(), + new HashMap<>() + ); hitContext.sourceLookup().setSource(source == null ? null : BytesReference.bytes(source)); FetchSourcePhase phase = new FetchSourcePhase();