Changes DocValueFieldsFetchSubPhase to reuse doc values iterators for multiple hits (#25644)

* Changes DocValueFieldsFetchSubPhase to reuse doc values iterators for multiple hits

Closes #24986

* iter

* Update ScriptDocValues to not reuse GeoPoint and Date objects

* added Javadoc about script value re-use
This commit is contained in:
Colin Goodheart-Smithe 2017-07-12 12:03:49 +00:00 committed by GitHub
parent 0a25558f98
commit 55a157e964
2 changed files with 43 additions and 19 deletions

View File

@ -43,8 +43,13 @@ import java.util.function.UnaryOperator;
/**
* Script level doc values, the assumption is that any implementation will implement a <code>getValue</code>
* and a <code>getValues</code> that return the relevant type that then can be used in scripts.
* Script level doc values, the assumption is that any implementation will
* implement a <code>getValue</code> and a <code>getValues</code> that return
* the relevant type that then can be used in scripts.
*
* Implementations should not internally re-use objects for the values that they
* return as a single {@link ScriptDocValues} instance can be reused to return
* values form multiple documents.
*/
public abstract class ScriptDocValues<T> extends AbstractList<T> {
/**
@ -266,7 +271,7 @@ public abstract class ScriptDocValues<T> extends AbstractList<T> {
return;
}
for (int i = 0; i < count; i++) {
dates[i].setMillis(in.nextValue());
dates[i] = new MutableDateTime(in.nextValue(), DateTimeZone.UTC);
}
}
}
@ -340,7 +345,7 @@ public abstract class ScriptDocValues<T> extends AbstractList<T> {
resize(in.docValueCount());
for (int i = 0; i < count; i++) {
GeoPoint point = in.nextValue();
values[i].reset(point.lat(), point.lon());
values[i] = new GeoPoint(point.lat(), point.lon());
}
} else {
resize(0);

View File

@ -18,15 +18,19 @@
*/
package org.elasticsearch.search.fetch.subphase;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.elasticsearch.common.document.DocumentField;
import org.elasticsearch.index.fielddata.AtomicFieldData;
import org.elasticsearch.index.fielddata.ScriptDocValues;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
@ -38,7 +42,8 @@ import java.util.HashMap;
public final class DocValueFieldsFetchSubPhase implements FetchSubPhase {
@Override
public void hitExecute(SearchContext context, HitContext hitContext) throws IOException {
public void hitsExecute(SearchContext context, SearchHit[] hits) throws IOException {
if (context.collapse() != null) {
// retrieve the `doc_value` associated with the collapse field
String name = context.collapse().getFieldType().name();
@ -48,26 +53,40 @@ public final class DocValueFieldsFetchSubPhase implements FetchSubPhase {
context.docValueFieldsContext().fields().add(name);
}
}
if (context.docValueFieldsContext() == null) {
return;
}
hits = hits.clone(); // don't modify the incoming hits
Arrays.sort(hits, (a, b) -> Integer.compare(a.docId(), b.docId()));
for (String field : context.docValueFieldsContext().fields()) {
if (hitContext.hit().fieldsOrNull() == null) {
hitContext.hit().fields(new HashMap<>(2));
}
DocumentField hitField = hitContext.hit().getFields().get(field);
if (hitField == null) {
hitField = new DocumentField(field, new ArrayList<>(2));
hitContext.hit().getFields().put(field, hitField);
}
MappedFieldType fieldType = context.mapperService().fullName(field);
if (fieldType != null) {
/* Because this is called once per document we end up creating a new ScriptDocValues for every document which is important
* because the values inside ScriptDocValues might be reused for different documents (Dates do this). */
AtomicFieldData data = context.fieldData().getForField(fieldType).load(hitContext.readerContext());
ScriptDocValues<?> values = data.getScriptValues();
values.setNextDocId(hitContext.docId());
hitField.getValues().addAll(values);
LeafReaderContext subReaderContext = null;
AtomicFieldData data = null;
ScriptDocValues<?> values = null;
for (SearchHit hit : hits) {
// if the reader index has changed we need to get a new doc values reader instance
if (subReaderContext == null || hit.docId() >= subReaderContext.docBase + subReaderContext.reader().maxDoc()) {
int readerIndex = ReaderUtil.subIndex(hit.docId(), context.searcher().getIndexReader().leaves());
subReaderContext = context.searcher().getIndexReader().leaves().get(readerIndex);
data = context.fieldData().getForField(fieldType).load(subReaderContext);
values = data.getScriptValues();
}
int subDocId = hit.docId() - subReaderContext.docBase;
values.setNextDocId(subDocId);
if (hit.fieldsOrNull() == null) {
hit.fields(new HashMap<>(2));
}
DocumentField hitField = hit.getFields().get(field);
if (hitField == null) {
hitField = new DocumentField(field, new ArrayList<>(2));
hit.getFields().put(field, hitField);
}
hitField.getValues().addAll(values);
}
}
}
}