From 55a157e964ed0ba732c47f78f39c6bc53decebff Mon Sep 17 00:00:00 2001 From: Colin Goodheart-Smithe Date: Wed, 12 Jul 2017 12:03:49 +0000 Subject: [PATCH] Changes DocValueFieldsFetchSubPhase to reuse doc values iterators for multiple hits (#25644) * Changes DocValueFieldsFetchSubPhase to reuse doc values iterators for multiple hits Closes #24986 * iter * Update ScriptDocValues to not reuse GeoPoint and Date objects * added Javadoc about script value re-use --- .../index/fielddata/ScriptDocValues.java | 13 +++-- .../subphase/DocValueFieldsFetchSubPhase.java | 49 +++++++++++++------ 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java b/core/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java index 3ae4e6ebc81..803356b8f1e 100644 --- a/core/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java +++ b/core/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java @@ -43,8 +43,13 @@ import java.util.function.UnaryOperator; /** - * Script level doc values, the assumption is that any implementation will implement a getValue - * and a getValues that return the relevant type that then can be used in scripts. + * Script level doc values, the assumption is that any implementation will + * implement a getValue and a getValues that return + * the relevant type that then can be used in scripts. + * + * Implementations should not internally re-use objects for the values that they + * return as a single {@link ScriptDocValues} instance can be reused to return + * values form multiple documents. */ public abstract class ScriptDocValues extends AbstractList { /** @@ -266,7 +271,7 @@ public abstract class ScriptDocValues extends AbstractList { return; } for (int i = 0; i < count; i++) { - dates[i].setMillis(in.nextValue()); + dates[i] = new MutableDateTime(in.nextValue(), DateTimeZone.UTC); } } } @@ -340,7 +345,7 @@ public abstract class ScriptDocValues extends AbstractList { resize(in.docValueCount()); for (int i = 0; i < count; i++) { GeoPoint point = in.nextValue(); - values[i].reset(point.lat(), point.lon()); + values[i] = new GeoPoint(point.lat(), point.lon()); } } else { resize(0); diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/DocValueFieldsFetchSubPhase.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/DocValueFieldsFetchSubPhase.java index cd0f1645ce0..a3e426c8c5c 100644 --- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/DocValueFieldsFetchSubPhase.java +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/DocValueFieldsFetchSubPhase.java @@ -18,15 +18,19 @@ */ package org.elasticsearch.search.fetch.subphase; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.ReaderUtil; import org.elasticsearch.common.document.DocumentField; import org.elasticsearch.index.fielddata.AtomicFieldData; import org.elasticsearch.index.fielddata.ScriptDocValues; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.fetch.FetchSubPhase; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -38,7 +42,8 @@ import java.util.HashMap; public final class DocValueFieldsFetchSubPhase implements FetchSubPhase { @Override - public void hitExecute(SearchContext context, HitContext hitContext) throws IOException { + public void hitsExecute(SearchContext context, SearchHit[] hits) throws IOException { + if (context.collapse() != null) { // retrieve the `doc_value` associated with the collapse field String name = context.collapse().getFieldType().name(); @@ -48,26 +53,40 @@ public final class DocValueFieldsFetchSubPhase implements FetchSubPhase { context.docValueFieldsContext().fields().add(name); } } + if (context.docValueFieldsContext() == null) { return; } + + hits = hits.clone(); // don't modify the incoming hits + Arrays.sort(hits, (a, b) -> Integer.compare(a.docId(), b.docId())); + for (String field : context.docValueFieldsContext().fields()) { - if (hitContext.hit().fieldsOrNull() == null) { - hitContext.hit().fields(new HashMap<>(2)); - } - DocumentField hitField = hitContext.hit().getFields().get(field); - if (hitField == null) { - hitField = new DocumentField(field, new ArrayList<>(2)); - hitContext.hit().getFields().put(field, hitField); - } MappedFieldType fieldType = context.mapperService().fullName(field); if (fieldType != null) { - /* Because this is called once per document we end up creating a new ScriptDocValues for every document which is important - * because the values inside ScriptDocValues might be reused for different documents (Dates do this). */ - AtomicFieldData data = context.fieldData().getForField(fieldType).load(hitContext.readerContext()); - ScriptDocValues values = data.getScriptValues(); - values.setNextDocId(hitContext.docId()); - hitField.getValues().addAll(values); + LeafReaderContext subReaderContext = null; + AtomicFieldData data = null; + ScriptDocValues values = null; + for (SearchHit hit : hits) { + // if the reader index has changed we need to get a new doc values reader instance + if (subReaderContext == null || hit.docId() >= subReaderContext.docBase + subReaderContext.reader().maxDoc()) { + int readerIndex = ReaderUtil.subIndex(hit.docId(), context.searcher().getIndexReader().leaves()); + subReaderContext = context.searcher().getIndexReader().leaves().get(readerIndex); + data = context.fieldData().getForField(fieldType).load(subReaderContext); + values = data.getScriptValues(); + } + int subDocId = hit.docId() - subReaderContext.docBase; + values.setNextDocId(subDocId); + if (hit.fieldsOrNull() == null) { + hit.fields(new HashMap<>(2)); + } + DocumentField hitField = hit.getFields().get(field); + if (hitField == null) { + hitField = new DocumentField(field, new ArrayList<>(2)); + hit.getFields().put(field, hitField); + } + hitField.getValues().addAll(values); + } } } }