inner hits: Only access stored fields when needed

Stored fields were still being accessed for nested inner hits even if the _source was not requested.
This was done to figure out the id of the root document. However this is already known higher up the stack.
So instead this change adds the id to the nested search context, so that it is no longer required to be fetched via the stored fields.

In case the _source is large and no source is requested then hot threads like these ones would still appear:

```
100.3% (501.3ms out of 500ms) cpu usage by thread 'elasticsearch[AfXKKfq][search][T#6]'
     2/10 snapshots sharing following 22 elements
       org.apache.lucene.store.DataInput.skipBytes(DataInput.java:352)
       org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.skipField(CompressingStoredFieldsReader.java:246)
       org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.visitDocument(CompressingStoredFieldsReader.java:601)
       org.apache.lucene.index.CodecReader.document(CodecReader.java:88)
       org.apache.lucene.index.FilterLeafReader.document(FilterLeafReader.java:411)
       org.elasticsearch.search.fetch.FetchPhase.loadStoredFields(FetchPhase.java:347)
       org.elasticsearch.search.fetch.FetchPhase.createNestedSearchHit(FetchPhase.java:219)
       org.elasticsearch.search.fetch.FetchPhase.execute(FetchPhase.java:150)
       org.elasticsearch.search.fetch.subphase.InnerHitsFetchSubPhase.hitsExecute(InnerHitsFetchSubPhase.java:73)
       org.elasticsearch.search.fetch.FetchPhase.execute(FetchPhase.java:166)
       org.elasticsearch.search.fetch.subphase.InnerHitsFetchSubPhase.hitsExecute(InnerHitsFetchSubPhase.java:73)
       org.elasticsearch.search.fetch.FetchPhase.execute(FetchPhase.java:166)
       org.elasticsearch.search.SearchService.executeFetchPhase(SearchService.java:422)
```

and:

```
8/10 snapshots sharing following 27 elements
       org.apache.lucene.codecs.compressing.LZ4.decompress(LZ4.java:135)
       org.apache.lucene.codecs.compressing.CompressionMode$4.decompress(CompressionMode.java:138)
       org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader$BlockState$1.fillBuffer(CompressingStoredFieldsReader.java:531)
       org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader$BlockState$1.readBytes(CompressingStoredFieldsReader.java:550)
       org.apache.lucene.store.DataInput.readBytes(DataInput.java:87)
       org.apache.lucene.store.DataInput.skipBytes(DataInput.java:350)
       org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.skipField(CompressingStoredFieldsReader.java:246)
       org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader.visitDocument(CompressingStoredFieldsReader.java:601)
       org.apache.lucene.index.CodecReader.document(CodecReader.java:88)
       org.apache.lucene.index.FilterLeafReader.document(FilterLeafReader.java:411)
       org.elasticsearch.search.fetch.FetchPhase.loadStoredFields(FetchPhase.java:347)
       org.elasticsearch.search.fetch.FetchPhase.createNestedSearchHit(FetchPhase.java:219)
       org.elasticsearch.search.fetch.FetchPhase.execute(FetchPhase.java:150)
       org.elasticsearch.search.fetch.subphase.InnerHitsFetchSubPhase.hitsExecute(InnerHitsFetchSubPhase.java:73)
       org.elasticsearch.search.fetch.FetchPhase.execute(FetchPhase.java:166)
       org.elasticsearch.search.fetch.subphase.InnerHitsFetchSubPhase.hitsExecute(InnerHitsFetchSubPhase.java:73)
       org.elasticsearch.search.fetch.FetchPhase.execute(FetchPhase.java:166)
       org.elasticsearch.search.SearchService.executeFetchPhase(SearchService.java:422)
```
This commit is contained in:
Martijn van Groningen 2017-07-24 16:14:32 +02:00
parent 7e08753bd2
commit a9ae52e78b
No known key found for this signature in database
GPG Key ID: AB236F4FCF2AF12A
4 changed files with 56 additions and 21 deletions

View File

@ -400,7 +400,6 @@
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]aggregations[/\\]support[/\\]AggregationPath.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]dfs[/\\]AggregatedDfs.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]dfs[/\\]DfsSearchResult.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]fetch[/\\]FetchPhase.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]internal[/\\]ShardSearchTransportRequest.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]lookup[/\\]FieldLookup.java" checks="LineLength" />
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]search[/\\]lookup[/\\]LeafDocLookup.java" checks="LineLength" />

View File

@ -42,10 +42,12 @@ import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.ObjectMapper;
import org.elasticsearch.index.mapper.SourceFieldMapper;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.SearchPhase;
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.search.fetch.subphase.InnerHitsContext;
import org.elasticsearch.search.fetch.subphase.InnerHitsFetchSubPhase;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.SourceLookup;
@ -99,7 +101,7 @@ public class FetchPhase implements SearchPhase {
for (String fieldName : context.storedFieldsContext().fieldNames()) {
if (fieldName.equals(SourceFieldMapper.NAME)) {
FetchSourceContext fetchSourceContext = context.hasFetchSourceContext() ? context.fetchSourceContext()
: FetchSourceContext.FETCH_SOURCE;
: FetchSourceContext.FETCH_SOURCE;
context.fetchSourceContext(new FetchSourceContext(true, fetchSourceContext.includes(), fetchSourceContext.excludes()));
continue;
}
@ -128,7 +130,7 @@ public class FetchPhase implements SearchPhase {
fieldsVisitor = new FieldsVisitor(loadSource);
} else {
fieldsVisitor = new CustomFieldsVisitor(fieldNames == null ? Collections.emptySet() : fieldNames,
fieldNamePatterns == null ? Collections.emptyList() : fieldNamePatterns, loadSource);
fieldNamePatterns == null ? Collections.emptyList() : fieldNamePatterns, loadSource);
}
}
@ -136,7 +138,7 @@ public class FetchPhase implements SearchPhase {
SearchHit[] hits = new SearchHit[context.docIdsToLoadSize()];
FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext();
for (int index = 0; index < context.docIdsToLoadSize(); index++) {
if(context.isCancelled()) {
if (context.isCancelled()) {
throw new TaskCancelledException("cancelled");
}
int docId = context.docIdsToLoad()[context.docIdsToLoadFrom() + index];
@ -147,7 +149,8 @@ public class FetchPhase implements SearchPhase {
final SearchHit searchHit;
int rootDocId = findRootDocumentIfNested(context, subReaderContext, subDocId);
if (rootDocId != -1) {
searchHit = createNestedSearchHit(context, docId, subDocId, rootDocId, fieldNames, fieldNamePatterns, subReaderContext);
searchHit = createNestedSearchHit(context, docId, subDocId, rootDocId, fieldNames, fieldNamePatterns,
subReaderContext);
} else {
searchHit = createSearchHit(context, fieldsVisitor, docId, subDocId, subReaderContext);
}
@ -179,7 +182,8 @@ public class FetchPhase implements SearchPhase {
return -1;
}
private SearchHit createSearchHit(SearchContext context, FieldsVisitor fieldsVisitor, int docId, int subDocId, LeafReaderContext subReaderContext) {
private SearchHit createSearchHit(SearchContext context, FieldsVisitor fieldsVisitor, int docId, int subDocId,
LeafReaderContext subReaderContext) {
if (fieldsVisitor == null) {
return new SearchHit(docId);
}
@ -211,24 +215,39 @@ public class FetchPhase implements SearchPhase {
return searchHit;
}
private SearchHit createNestedSearchHit(SearchContext context, int nestedTopDocId, int nestedSubDocId, int rootSubDocId, Set<String> fieldNames, List<String> fieldNamePatterns, LeafReaderContext subReaderContext) throws IOException {
private SearchHit createNestedSearchHit(SearchContext context, int nestedTopDocId, int nestedSubDocId,
int rootSubDocId, Set<String> fieldNames,
List<String> fieldNamePatterns, LeafReaderContext subReaderContext) throws IOException {
// Also if highlighting is requested on nested documents we need to fetch the _source from the root document,
// otherwise highlighting will attempt to fetch the _source from the nested doc, which will fail,
// because the entire _source is only stored with the root document.
final FieldsVisitor rootFieldsVisitor = new FieldsVisitor(context.sourceRequested() || context.highlight() != null);
loadStoredFields(context, subReaderContext, rootFieldsVisitor, rootSubDocId);
rootFieldsVisitor.postProcess(context.mapperService());
final Uid uid;
final BytesReference source;
final boolean needSource = context.sourceRequested() || context.highlight() != null;
if (needSource || (context instanceof InnerHitsContext.InnerHitSubContext == false)) {
FieldsVisitor rootFieldsVisitor = new FieldsVisitor(needSource);
loadStoredFields(context, subReaderContext, rootFieldsVisitor, rootSubDocId);
rootFieldsVisitor.postProcess(context.mapperService());
uid = rootFieldsVisitor.uid();
source = rootFieldsVisitor.source();
} else {
// In case of nested inner hits we already know the uid, so no need to fetch it from stored fields again!
uid = ((InnerHitsContext.InnerHitSubContext) context).getUid();
source = null;
}
Map<String, DocumentField> searchFields = getSearchFields(context, nestedSubDocId, fieldNames, fieldNamePatterns, subReaderContext);
DocumentMapper documentMapper = context.mapperService().documentMapper(rootFieldsVisitor.uid().type());
Map<String, DocumentField> searchFields =
getSearchFields(context, nestedSubDocId, fieldNames, fieldNamePatterns, subReaderContext);
DocumentMapper documentMapper = context.mapperService().documentMapper(uid.type());
SourceLookup sourceLookup = context.lookup().source();
sourceLookup.setSegmentAndDocument(subReaderContext, nestedSubDocId);
ObjectMapper nestedObjectMapper = documentMapper.findNestedObjectMapper(nestedSubDocId, context, subReaderContext);
assert nestedObjectMapper != null;
SearchHit.NestedIdentity nestedIdentity = getInternalNestedIdentity(context, nestedSubDocId, subReaderContext, documentMapper, nestedObjectMapper);
SearchHit.NestedIdentity nestedIdentity =
getInternalNestedIdentity(context, nestedSubDocId, subReaderContext, documentMapper, nestedObjectMapper);
BytesReference source = rootFieldsVisitor.source();
if (source != null) {
Tuple<XContentType, Map<String, Object>> tuple = XContentHelper.convertToMap(source, true);
Map<String, Object> sourceAsMap = tuple.v2();
@ -248,7 +267,8 @@ public class FetchPhase implements SearchPhase {
// nested field has an array value in the _source
nestedParsedSource = (List<Map<String, Object>>) extractedValue;
} else if (extractedValue instanceof Map) {
// nested field has an object value in the _source. This just means the nested field has just one inner object, which is valid, but uncommon.
// nested field has an object value in the _source. This just means the nested field has just one inner object,
// which is valid, but uncommon.
nestedParsedSource = Collections.singletonList((Map<String, Object>) extractedValue);
} else {
throw new IllegalStateException("extracted source isn't an object or an array");
@ -268,15 +288,15 @@ public class FetchPhase implements SearchPhase {
context.lookup().source().setSource(nestedSource);
context.lookup().source().setSourceContentType(contentType);
}
return new SearchHit(nestedTopDocId, rootFieldsVisitor.uid().id(), documentMapper.typeText(), nestedIdentity, searchFields);
return new SearchHit(nestedTopDocId, uid.id(), documentMapper.typeText(), nestedIdentity, searchFields);
}
private Map<String, DocumentField> getSearchFields(SearchContext context, int nestedSubDocId, Set<String> fieldNames, List<String> fieldNamePatterns, LeafReaderContext subReaderContext) {
private Map<String, DocumentField> getSearchFields(SearchContext context, int nestedSubDocId, Set<String> fieldNames,
List<String> fieldNamePatterns, LeafReaderContext subReaderContext) {
Map<String, DocumentField> searchFields = null;
if (context.hasStoredFields() && !context.storedFieldsContext().fieldNames().isEmpty()) {
FieldsVisitor nestedFieldsVisitor = new CustomFieldsVisitor(fieldNames == null ? Collections.emptySet() : fieldNames,
fieldNamePatterns == null ? Collections.emptyList() : fieldNamePatterns, false);
fieldNamePatterns == null ? Collections.emptyList() : fieldNamePatterns, false);
if (nestedFieldsVisitor != null) {
loadStoredFields(context, subReaderContext, nestedFieldsVisitor, nestedSubDocId);
nestedFieldsVisitor.postProcess(context.mapperService());
@ -291,7 +311,9 @@ public class FetchPhase implements SearchPhase {
return searchFields;
}
private SearchHit.NestedIdentity getInternalNestedIdentity(SearchContext context, int nestedSubDocId, LeafReaderContext subReaderContext, DocumentMapper documentMapper, ObjectMapper nestedObjectMapper) throws IOException {
private SearchHit.NestedIdentity getInternalNestedIdentity(SearchContext context, int nestedSubDocId,
LeafReaderContext subReaderContext, DocumentMapper documentMapper,
ObjectMapper nestedObjectMapper) throws IOException {
int currentParent = nestedSubDocId;
ObjectMapper nestedParentObjectMapper;
ObjectMapper current = nestedObjectMapper;
@ -327,7 +349,8 @@ public class FetchPhase implements SearchPhase {
int offset = 0;
int nextParent = parentBits.nextSetBit(currentParent);
for (int docId = childIter.advance(currentParent + 1); docId < nextParent && docId != DocIdSetIterator.NO_MORE_DOCS; docId = childIter.nextDoc()) {
for (int docId = childIter.advance(currentParent + 1); docId < nextParent && docId != DocIdSetIterator.NO_MORE_DOCS;
docId = childIter.nextDoc()) {
offset++;
}
currentParent = nextParent;

View File

@ -31,6 +31,7 @@ import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.internal.SubSearchContext;
@ -78,6 +79,9 @@ public final class InnerHitsContext {
protected final SearchContext context;
private InnerHitsContext childInnerHits;
// TODO: when types are complete removed just use String instead for the id:
private Uid uid;
protected InnerHitSubContext(String name, SearchContext context) {
super(context);
this.name = name;
@ -108,6 +112,13 @@ public final class InnerHitsContext {
return context;
}
public Uid getUid() {
return uid;
}
public void setUid(Uid uid) {
this.uid = uid;
}
}
public static void intersect(Weight weight, Weight innerHitQueryWeight, Collector collector, LeafReaderContext ctx) throws IOException {

View File

@ -22,6 +22,7 @@ package org.elasticsearch.search.fetch.subphase;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.fetch.FetchPhase;
@ -64,6 +65,7 @@ public final class InnerHitsFetchSubPhase implements FetchSubPhase {
docIdsToLoad[j] = topDoc.scoreDocs[j].doc;
}
innerHits.docIdsToLoad(docIdsToLoad, 0, docIdsToLoad.length);
innerHits.setUid(new Uid(hit.getType(), hit.getId()));
fetchPhase.execute(innerHits);
FetchSearchResult fetchResult = innerHits.fetchResult();
SearchHit[] internalHits = fetchResult.fetchResult().hits().getHits();