Merge pull request #15017 from jimferenczi/fields_option

Refuse to load fields from _source when using the `fields` option and support wildcards.
This commit is contained in:
Jim Ferenczi 2015-11-30 11:01:21 +01:00
commit e182072b6f
9 changed files with 95 additions and 149 deletions

View File

@ -1,37 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fieldvisitor;
import org.apache.lucene.index.FieldInfo;
import java.io.IOException;
/**
*/
public class AllFieldsVisitor extends FieldsVisitor {
public AllFieldsVisitor() {
super(true);
}
@Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
return Status.YES;
}
}

View File

@ -19,22 +19,32 @@
package org.elasticsearch.index.fieldvisitor;
import org.apache.lucene.index.FieldInfo;
import org.elasticsearch.common.regex.Regex;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Set;
/**
* A field visitor that allows to load a selection of the stored fields.
* A field visitor that allows to load a selection of the stored fields by exact name or by pattern.
* Supported pattern styles: "xxx*", "*xxx", "*xxx*" and "xxx*yyy".
* The Uid field is always loaded.
* The class is optimized for source loading as it is a common use case.
*/
public class CustomFieldsVisitor extends FieldsVisitor {
private final Set<String> fields;
private final List<String> patterns;
public CustomFieldsVisitor(Set<String> fields, boolean loadSource) {
public CustomFieldsVisitor(Set<String> fields, List<String> patterns, boolean loadSource) {
super(loadSource);
this.fields = fields;
this.patterns = patterns;
}
public CustomFieldsVisitor(Set<String> fields, boolean loadSource) {
this(fields, Collections.emptyList(), loadSource);
}
@Override
@ -42,7 +52,14 @@ public class CustomFieldsVisitor extends FieldsVisitor {
if (super.needsField(fieldInfo) == Status.YES) {
return Status.YES;
}
return fields.contains(fieldInfo.name) ? Status.YES : Status.NO;
if (fields.contains(fieldInfo.name)) {
return Status.YES;
}
for (String pattern : patterns) {
if (Regex.simpleMatch(pattern, fieldInfo.name)) {
return Status.YES;
}
}
return Status.NO;
}
}

View File

@ -30,12 +30,12 @@ import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.text.StringAndBytesText;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.fieldvisitor.AllFieldsVisitor;
import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
import org.elasticsearch.index.fieldvisitor.FieldsVisitor;
import org.elasticsearch.index.mapper.DocumentMapper;
@ -55,13 +55,7 @@ import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.SourceLookup;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.*;
import static java.util.Collections.unmodifiableMap;
import static org.elasticsearch.common.xcontent.XContentFactory.contentBuilder;
@ -98,9 +92,7 @@ public class FetchPhase implements SearchPhase {
public void execute(SearchContext context) {
FieldsVisitor fieldsVisitor;
Set<String> fieldNames = null;
List<String> extractFieldNames = null;
boolean loadAllStored = false;
List<String> fieldNamePatterns = null;
if (!context.hasFieldNames()) {
// no fields specified, default to return source if no explicit indication
if (!context.hasScriptFields() && !context.hasFetchSourceContext()) {
@ -111,10 +103,6 @@ public class FetchPhase implements SearchPhase {
fieldsVisitor = new FieldsVisitor(context.sourceRequested());
} else {
for (String fieldName : context.fieldNames()) {
if (fieldName.equals("*")) {
loadAllStored = true;
continue;
}
if (fieldName.equals(SourceFieldMapper.NAME)) {
if (context.hasFetchSourceContext()) {
context.fetchSourceContext().fetchSource(true);
@ -123,32 +111,28 @@ public class FetchPhase implements SearchPhase {
}
continue;
}
if (Regex.isSimpleMatchPattern(fieldName)) {
if (fieldNamePatterns == null) {
fieldNamePatterns = new ArrayList<>();
}
fieldNamePatterns.add(fieldName);
} else {
MappedFieldType fieldType = context.smartNameFieldType(fieldName);
if (fieldType == null) {
// Only fail if we know it is a object field, missing paths / fields shouldn't fail.
if (context.getObjectMapper(fieldName) != null) {
throw new IllegalArgumentException("field [" + fieldName + "] isn't a leaf field");
}
} else if (fieldType.stored()) {
}
if (fieldNames == null) {
fieldNames = new HashSet<>();
}
fieldNames.add(fieldType.names().indexName());
} else {
if (extractFieldNames == null) {
extractFieldNames = new ArrayList<>();
}
extractFieldNames.add(fieldName);
fieldNames.add(fieldName);
}
}
if (loadAllStored) {
fieldsVisitor = new AllFieldsVisitor(); // load everything, including _source
} else if (fieldNames != null) {
boolean loadSource = extractFieldNames != null || context.sourceRequested();
fieldsVisitor = new CustomFieldsVisitor(fieldNames, loadSource);
} else {
fieldsVisitor = new FieldsVisitor(extractFieldNames != null || context.sourceRequested());
}
boolean loadSource = context.sourceRequested();
fieldsVisitor = new CustomFieldsVisitor(fieldNames == null ? Collections.emptySet() : fieldNames,
fieldNamePatterns == null ? Collections.emptyList() : fieldNamePatterns, loadSource);
}
InternalSearchHit[] hits = new InternalSearchHit[context.docIdsToLoadSize()];
@ -163,9 +147,9 @@ public class FetchPhase implements SearchPhase {
try {
int rootDocId = findRootDocumentIfNested(context, subReaderContext, subDocId);
if (rootDocId != -1) {
searchHit = createNestedSearchHit(context, docId, subDocId, rootDocId, extractFieldNames, loadAllStored, fieldNames, subReaderContext);
searchHit = createNestedSearchHit(context, docId, subDocId, rootDocId, fieldNames, fieldNamePatterns, subReaderContext);
} else {
searchHit = createSearchHit(context, fieldsVisitor, docId, subDocId, extractFieldNames, subReaderContext);
searchHit = createSearchHit(context, fieldsVisitor, docId, subDocId, subReaderContext);
}
} catch (IOException e) {
throw ExceptionsHelper.convertToElastic(e);
@ -199,7 +183,7 @@ public class FetchPhase implements SearchPhase {
return -1;
}
private InternalSearchHit createSearchHit(SearchContext context, FieldsVisitor fieldsVisitor, int docId, int subDocId, List<String> extractFieldNames, LeafReaderContext subReaderContext) {
private InternalSearchHit createSearchHit(SearchContext context, FieldsVisitor fieldsVisitor, int docId, int subDocId, LeafReaderContext subReaderContext) {
loadStoredFields(context, subReaderContext, fieldsVisitor, subDocId);
fieldsVisitor.postProcess(context.mapperService());
@ -219,45 +203,24 @@ public class FetchPhase implements SearchPhase {
typeText = documentMapper.typeText();
}
InternalSearchHit searchHit = new InternalSearchHit(docId, fieldsVisitor.uid().id(), typeText, searchFields);
// go over and extract fields that are not mapped / stored
// Set _source if requested.
SourceLookup sourceLookup = context.lookup().source();
sourceLookup.setSegmentAndDocument(subReaderContext, subDocId);
if (fieldsVisitor.source() != null) {
sourceLookup.setSource(fieldsVisitor.source());
}
if (extractFieldNames != null) {
for (String extractFieldName : extractFieldNames) {
List<Object> values = context.lookup().source().extractRawValues(extractFieldName);
if (!values.isEmpty()) {
if (searchHit.fieldsOrNull() == null) {
searchHit.fields(new HashMap<String, SearchHitField>(2));
}
SearchHitField hitField = searchHit.fields().get(extractFieldName);
if (hitField == null) {
hitField = new InternalSearchHitField(extractFieldName, new ArrayList<>(2));
searchHit.fields().put(extractFieldName, hitField);
}
for (Object value : values) {
hitField.values().add(value);
}
}
}
}
return searchHit;
}
private InternalSearchHit createNestedSearchHit(SearchContext context, int nestedTopDocId, int nestedSubDocId, int rootSubDocId, List<String> extractFieldNames, boolean loadAllStored, Set<String> fieldNames, LeafReaderContext subReaderContext) throws IOException {
private InternalSearchHit createNestedSearchHit(SearchContext context, int nestedTopDocId, int nestedSubDocId, int rootSubDocId, Set<String> fieldNames, List<String> fieldNamePatterns, LeafReaderContext subReaderContext) throws IOException {
// Also if highlighting is requested on nested documents we need to fetch the _source from the root document,
// otherwise highlighting will attempt to fetch the _source from the nested doc, which will fail,
// because the entire _source is only stored with the root document.
final FieldsVisitor rootFieldsVisitor = new FieldsVisitor(context.sourceRequested() || extractFieldNames != null || context.highlight() != null);
final FieldsVisitor rootFieldsVisitor = new FieldsVisitor(context.sourceRequested() || context.highlight() != null);
loadStoredFields(context, subReaderContext, rootFieldsVisitor, rootSubDocId);
rootFieldsVisitor.postProcess(context.mapperService());
Map<String, SearchHitField> searchFields = getSearchFields(context, nestedSubDocId, loadAllStored, fieldNames, subReaderContext);
Map<String, SearchHitField> searchFields = getSearchFields(context, nestedSubDocId, fieldNames, fieldNamePatterns, subReaderContext);
DocumentMapper documentMapper = context.mapperService().documentMapper(rootFieldsVisitor.uid().type());
SourceLookup sourceLookup = context.lookup().source();
sourceLookup.setSegmentAndDocument(subReaderContext, nestedSubDocId);
@ -299,39 +262,14 @@ public class FetchPhase implements SearchPhase {
}
InternalSearchHit searchHit = new InternalSearchHit(nestedTopDocId, rootFieldsVisitor.uid().id(), documentMapper.typeText(), nestedIdentity, searchFields);
if (extractFieldNames != null) {
for (String extractFieldName : extractFieldNames) {
List<Object> values = context.lookup().source().extractRawValues(extractFieldName);
if (!values.isEmpty()) {
if (searchHit.fieldsOrNull() == null) {
searchHit.fields(new HashMap<String, SearchHitField>(2));
}
SearchHitField hitField = searchHit.fields().get(extractFieldName);
if (hitField == null) {
hitField = new InternalSearchHitField(extractFieldName, new ArrayList<>(2));
searchHit.fields().put(extractFieldName, hitField);
}
for (Object value : values) {
hitField.values().add(value);
}
}
}
}
return searchHit;
}
private Map<String, SearchHitField> getSearchFields(SearchContext context, int nestedSubDocId, boolean loadAllStored, Set<String> fieldNames, LeafReaderContext subReaderContext) {
private Map<String, SearchHitField> getSearchFields(SearchContext context, int nestedSubDocId, Set<String> fieldNames, List<String> fieldNamePatterns, LeafReaderContext subReaderContext) {
Map<String, SearchHitField> searchFields = null;
if (context.hasFieldNames() && !context.fieldNames().isEmpty()) {
FieldsVisitor nestedFieldsVisitor = null;
if (loadAllStored) {
nestedFieldsVisitor = new AllFieldsVisitor();
} else if (fieldNames != null) {
nestedFieldsVisitor = new CustomFieldsVisitor(fieldNames, false);
}
FieldsVisitor nestedFieldsVisitor = new CustomFieldsVisitor(fieldNames == null ? Collections.emptySet() : fieldNames,
fieldNamePatterns == null ? Collections.emptyList() : fieldNamePatterns, false);
if (nestedFieldsVisitor != null) {
loadStoredFields(context, subReaderContext, nestedFieldsVisitor, nestedSubDocId);
nestedFieldsVisitor.postProcess(context.mapperService());

View File

@ -118,7 +118,8 @@ public class SimpleIndexTemplateIT extends ESIntegTestCase {
assertHitCount(searchResponse, 1);
assertThat(searchResponse.getHits().getAt(0).field("field1").value().toString(), equalTo("value1"));
assertThat(searchResponse.getHits().getAt(0).field("field2").value().toString(), equalTo("value 2")); // this will still be loaded because of the source feature
// field2 is not stored.
assertThat(searchResponse.getHits().getAt(0).field("field2"), nullValue());
client().prepareIndex("text_index", "type1", "1").setSource("field1", "value1", "field2", "value 2").setRefresh(true).execute().actionGet();

View File

@ -154,7 +154,7 @@ public abstract class AbstractGeoTestCase extends ESIntegTestCase {
.endObject()));
}
assertAcked(prepareCreate(HIGH_CARD_IDX_NAME).setSettings(Settings.builder().put("number_of_shards", 2))
.addMapping("type", SINGLE_VALUED_FIELD_NAME, "type=geo_point", MULTI_VALUED_FIELD_NAME, "type=geo_point", NUMBER_FIELD_NAME, "type=long", "tag", "type=string,index=not_analyzed"));
.addMapping("type", SINGLE_VALUED_FIELD_NAME, "type=geo_point", MULTI_VALUED_FIELD_NAME, "type=geo_point", NUMBER_FIELD_NAME, "type=long,store=true", "tag", "type=string,index=not_analyzed"));
for (int i = 0; i < 2000; i++) {
singleVal = singleValues[i % numUniqueGeoPoints];
@ -196,8 +196,8 @@ public abstract class AbstractGeoTestCase extends ESIntegTestCase {
SearchHitField hitField = searchHit.field(NUMBER_FIELD_NAME);
assertThat("Hit " + i + " has wrong number of values", hitField.getValues().size(), equalTo(1));
Integer value = hitField.getValue();
assertThat("Hit " + i + " has wrong value", value, equalTo(i));
Long value = hitField.getValue();
assertThat("Hit " + i + " has wrong value", value.intValue(), equalTo(i));
}
assertThat(totalHits, equalTo(2000l));
}

View File

@ -532,8 +532,8 @@ public class TopHitsIT extends ESIntegTestCase {
topHits("hits").setSize(1)
.highlighter(new HighlightBuilder().field("text"))
.setExplain(true)
.addFieldDataField("field1")
.addField("text")
.addFieldDataField("field1")
.addScriptField("script", new Script("5", ScriptService.ScriptType.INLINE, MockScriptEngine.NAME, Collections.emptyMap()))
.setFetchSource("text", null)
.setVersion(true)
@ -569,8 +569,7 @@ public class TopHitsIT extends ESIntegTestCase {
SearchHitField field = hit.field("field1");
assertThat(field.getValue().toString(), equalTo("5"));
field = hit.field("text");
assertThat(field.getValue().toString(), equalTo("some text to entertain"));
assertThat(hit.getSource().get("text").toString(), equalTo("some text to entertain"));
field = hit.field("script");
assertThat(field.getValue().toString(), equalTo("5"));

View File

@ -468,3 +468,7 @@ response is output by default.
Finally, the API for org.elasticsearch.monitor.os.OsStats has changed. The `getLoadAverage` method has been removed. The
value for this can now be obtained from `OsStats.Cpu#getLoadAverage`. Additionally, the recent CPU usage can be obtained
from `OsStats.Cpu#getPercent`.
=== Fields option
Only stored fields are retrievable with this option.
The fields option won't be able to load non stored fields from _source anymore.

View File

@ -108,12 +108,12 @@ public class SearchFieldsTests extends ESIntegTestCase {
assertThat(searchResponse.getHits().getAt(0).fields().size(), equalTo(1));
assertThat(searchResponse.getHits().getAt(0).fields().get("field1").value().toString(), equalTo("value1"));
// field2 is not stored, check that it gets extracted from source
// field2 is not stored, check that it is not extracted from source.
searchResponse = client().prepareSearch().setQuery(matchAllQuery()).addField("field2").execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(1l));
assertThat(searchResponse.getHits().hits().length, equalTo(1));
assertThat(searchResponse.getHits().getAt(0).fields().size(), equalTo(1));
assertThat(searchResponse.getHits().getAt(0).fields().get("field2").value().toString(), equalTo("value2"));
assertThat(searchResponse.getHits().getAt(0).fields().size(), equalTo(0));
assertThat(searchResponse.getHits().getAt(0).fields().get("field2"), nullValue());
searchResponse = client().prepareSearch().setQuery(matchAllQuery()).addField("field3").execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(1l));
@ -121,6 +121,34 @@ public class SearchFieldsTests extends ESIntegTestCase {
assertThat(searchResponse.getHits().getAt(0).fields().size(), equalTo(1));
assertThat(searchResponse.getHits().getAt(0).fields().get("field3").value().toString(), equalTo("value3"));
searchResponse = client().prepareSearch().setQuery(matchAllQuery()).addField("*3").execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(1l));
assertThat(searchResponse.getHits().hits().length, equalTo(1));
assertThat(searchResponse.getHits().getAt(0).fields().size(), equalTo(1));
assertThat(searchResponse.getHits().getAt(0).fields().get("field3").value().toString(), equalTo("value3"));
searchResponse = client().prepareSearch().setQuery(matchAllQuery()).addField("*3").addField("field1").addField("field2").execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(1l));
assertThat(searchResponse.getHits().hits().length, equalTo(1));
assertThat(searchResponse.getHits().getAt(0).fields().size(), equalTo(2));
assertThat(searchResponse.getHits().getAt(0).fields().get("field3").value().toString(), equalTo("value3"));
assertThat(searchResponse.getHits().getAt(0).fields().get("field1").value().toString(), equalTo("value1"));
searchResponse = client().prepareSearch().setQuery(matchAllQuery()).addField("field*").execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(1l));
assertThat(searchResponse.getHits().hits().length, equalTo(1));
assertThat(searchResponse.getHits().getAt(0).fields().size(), equalTo(2));
assertThat(searchResponse.getHits().getAt(0).fields().get("field3").value().toString(), equalTo("value3"));
assertThat(searchResponse.getHits().getAt(0).fields().get("field1").value().toString(), equalTo("value1"));
searchResponse = client().prepareSearch().setQuery(matchAllQuery()).addField("f*3").execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(1l));
assertThat(searchResponse.getHits().hits().length, equalTo(1));
assertThat(searchResponse.getHits().getAt(0).fields().size(), equalTo(1));
assertThat(searchResponse.getHits().getAt(0).fields().get("field3").value().toString(), equalTo("value3"));
searchResponse = client().prepareSearch().setQuery(matchAllQuery()).addField("*").execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(1l));
assertThat(searchResponse.getHits().hits().length, equalTo(1));
@ -439,8 +467,7 @@ public class SearchFieldsTests extends ESIntegTestCase {
.get();
assertThat(searchResponse.getHits().totalHits(), equalTo(1l));
assertThat(searchResponse.getHits().getAt(0).field("field1").isMetadataField(), equalTo(false));
assertThat(searchResponse.getHits().getAt(0).field("field1").getValue().toString(), equalTo("value"));
assertThat(searchResponse.getHits().getAt(0).field("field1"), nullValue());
assertThat(searchResponse.getHits().getAt(0).field("_routing").isMetadataField(), equalTo(true));
assertThat(searchResponse.getHits().getAt(0).field("_routing").getValue().toString(), equalTo("1"));
}
@ -647,8 +674,7 @@ public class SearchFieldsTests extends ESIntegTestCase {
Map<String, SearchHitField> fields = response.getHits().getAt(0).getFields();
assertThat(fields.get("field1").isMetadataField(), equalTo(false));
assertThat(fields.get("field1").getValue().toString(), equalTo("value"));
assertThat(fields.get("field1"), nullValue());
assertThat(fields.get("_routing").isMetadataField(), equalTo(true));
assertThat(fields.get("_routing").getValue().toString(), equalTo("1"));
assertThat(fields.get("_timestamp").isMetadataField(), equalTo(true));

View File

@ -79,7 +79,6 @@
body:
fields: [ include.field2 ]
query: { match_all: {} }
- match: { hits.hits.0.fields: { include.field2 : [v2] }}
- is_false: hits.hits.0._source
- do:
@ -87,7 +86,7 @@
body:
fields: [ include.field2, _source ]
query: { match_all: {} }
- match: { hits.hits.0.fields: { include.field2 : [v2] }}
- match: { hits.hits.0._source.include.field2: v2 }
- is_true: hits.hits.0._source
@ -95,4 +94,3 @@
search:
fielddata_fields: [ "count" ]
- match: { hits.hits.0.fields.count: [1] }