Nested inner hits shouldn't use relative paths

Like on other places in the query dsl the full field name should be used.
Before this change this wasn't the case for nested inner hits when source filtering was used.
Highlighting has a workaround, which is now removed as the source of nested inner hits can only be refered by the full name.

Closes #16653
This commit is contained in:
Martijn van Groningen 2016-05-24 22:47:28 +02:00
parent 0b965167fd
commit 0e9f3addd2
8 changed files with 62 additions and 38 deletions

View File

@ -239,14 +239,18 @@ public class FetchPhase implements SearchPhase {
Tuple<XContentType, Map<String, Object>> tuple = XContentHelper.convertToMap(source, true);
Map<String, Object> sourceAsMap = tuple.v2();
List<Map<String, Object>> nestedParsedSource;
SearchHit.NestedIdentity nested = nestedIdentity;
do {
Object extractedValue = XContentMapValues.extractValue(nested.getField().string(), sourceAsMap);
if (extractedValue == null) {
// The nested objects may not exist in the _source, because it was filtered because of _source filtering
break;
} else if (extractedValue instanceof List) {
// Isolate the nested json array object that matches with nested hit and wrap it back into the same json
// structure with the nested json array object being the actual content. The latter is important, so that
// features like source filtering and highlighting work consistent regardless of whether the field points
// to a json object array for consistency reasons on how we refer to fields
Map<String, Object> nestedSourceAsMap = new HashMap<>();
Map<String, Object> current = nestedSourceAsMap;
for (SearchHit.NestedIdentity nested = nestedIdentity; nested != null; nested = nested.getChild()) {
String nestedPath = nested.getField().string();
current.put(nestedPath, new HashMap<>());
Object extractedValue = XContentMapValues.extractValue(nestedPath, sourceAsMap);
List<Map<String, Object>> nestedParsedSource;
if (extractedValue instanceof List) {
// nested field has an array value in the _source
nestedParsedSource = (List<Map<String, Object>>) extractedValue;
} else if (extractedValue instanceof Map) {
@ -256,18 +260,22 @@ public class FetchPhase implements SearchPhase {
throw new IllegalStateException("extracted source isn't an object or an array");
}
sourceAsMap = nestedParsedSource.get(nested.getOffset());
nested = nested.getChild();
} while (nested != null);
context.lookup().source().setSource(sourceAsMap);
if (nested.getChild() == null) {
current.put(nestedPath, sourceAsMap);
} else {
Map<String, Object> next = new HashMap<>();
current.put(nestedPath, next);
current = next;
}
}
context.lookup().source().setSource(nestedSourceAsMap);
XContentType contentType = tuple.v1();
BytesReference nestedSource = contentBuilder(contentType).map(sourceAsMap).bytes();
context.lookup().source().setSource(nestedSource);
context.lookup().source().setSourceContentType(contentType);
}
InternalSearchHit searchHit = new InternalSearchHit(nestedTopDocId, rootFieldsVisitor.uid().id(), documentMapper.typeText(), nestedIdentity, searchFields);
return searchHit;
return new InternalSearchHit(nestedTopDocId, rootFieldsVisitor.uid().id(), documentMapper.typeText(), nestedIdentity, searchFields);
}
private Map<String, SearchHitField> getSearchFields(SearchContext context, int nestedSubDocId, Set<String> fieldNames, List<String> fieldNamePatterns, LeafReaderContext subReaderContext) {

View File

@ -81,25 +81,6 @@ public interface FetchSubPhase {
return cache;
}
public String getSourcePath(String sourcePath) {
SearchHit.NestedIdentity nested = hit().getNestedIdentity();
if (nested != null) {
// in case of nested we need to figure out what is the _source field from the perspective
// of the nested hit it self. The nested _source is isolated and the root and potentially parent objects
// are gone
StringBuilder nestedPath = new StringBuilder();
for (; nested != null; nested = nested.getChild()) {
nestedPath.append(nested.getField());
}
assert sourcePath.startsWith(nestedPath.toString());
int startIndex = nestedPath.length() + 1; // the path until the deepest nested object + '.'
return sourcePath.substring(startIndex);
} else {
return sourcePath;
}
}
}
default Map<String, ? extends SearchParseElement> parseElements() {

View File

@ -58,7 +58,7 @@ public final class HighlightUtils {
} else {
SourceLookup sourceLookup = searchContext.lookup().source();
sourceLookup.setSegmentAndDocument(hitContext.readerContext(), hitContext.docId());
textsToHighlight = sourceLookup.extractRawValues(hitContext.getSourcePath(mapper.fieldType().name()));
textsToHighlight = sourceLookup.extractRawValues(mapper.fieldType().name());
}
assert textsToHighlight != null;
return textsToHighlight;

View File

@ -59,7 +59,7 @@ public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder
SourceLookup sourceLookup = searchContext.lookup().source();
sourceLookup.setSegmentAndDocument((LeafReaderContext) reader.getContext(), docId);
List<Object> values = sourceLookup.extractRawValues(hitContext.getSourcePath(mapper.fieldType().name()));
List<Object> values = sourceLookup.extractRawValues(mapper.fieldType().name());
Field[] fields = new Field[values.size()];
for (int i = 0; i < values.size(); i++) {
fields[i] = new Field(mapper.fieldType().name(), values.get(i).toString(), TextField.TYPE_NOT_STORED);

View File

@ -55,7 +55,7 @@ public class SourceSimpleFragmentsBuilder extends SimpleFragmentsBuilder {
SourceLookup sourceLookup = searchContext.lookup().source();
sourceLookup.setSegmentAndDocument((LeafReaderContext) reader.getContext(), docId);
List<Object> values = sourceLookup.extractRawValues(hitContext.getSourcePath(mapper.fieldType().name()));
List<Object> values = sourceLookup.extractRawValues(mapper.fieldType().name());
if (values.isEmpty()) {
return EMPTY_FIELDS;
}

View File

@ -26,6 +26,7 @@ import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.script.MockScriptEngine;
@ -853,7 +854,7 @@ public class TopHitsIT extends ESIntegTestCase {
nested("to-comments", "comments").subAggregation(
topHits("top-comments").size(1).highlighter(new HighlightBuilder().field(hlField)).explain(true)
.fieldDataField("comments.user")
.scriptField("script", new Script("5", ScriptService.ScriptType.INLINE, MockScriptEngine.NAME, Collections.emptyMap())).fetchSource("message", null)
.scriptField("script", new Script("5", ScriptService.ScriptType.INLINE, MockScriptEngine.NAME, Collections.emptyMap())).fetchSource("comments.message", null)
.version(true).sort("comments.date", SortOrder.ASC))).get();
assertHitCount(searchResponse, 2);
Nested nested = searchResponse.getAggregations().get("to-comments");
@ -888,7 +889,7 @@ public class TopHitsIT extends ESIntegTestCase {
assertThat(field.getValue().toString(), equalTo("5"));
assertThat(searchHit.sourceAsMap().size(), equalTo(1));
assertThat(searchHit.sourceAsMap().get("message").toString(), equalTo("some comment"));
assertThat(XContentMapValues.extractValue("comments.message", searchHit.sourceAsMap()), equalTo("some comment"));
}
public void testTopHitsInNested() throws Exception {

View File

@ -26,6 +26,7 @@ import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.cluster.health.ClusterHealthStatus;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.InnerHitBuilder;
import org.elasticsearch.plugins.Plugin;
@ -34,6 +35,7 @@ import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptService;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.fetch.source.FetchSourceContext;
import org.elasticsearch.search.highlight.HighlightBuilder;
import org.elasticsearch.search.sort.FieldSortBuilder;
import org.elasticsearch.search.sort.SortBuilders;
@ -47,6 +49,7 @@ import java.util.List;
import java.util.Locale;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.common.xcontent.support.XContentMapValues.extractValue;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.constantScoreQuery;
import static org.elasticsearch.index.query.QueryBuilders.hasChildQuery;
@ -928,4 +931,32 @@ public class InnerHitsIT extends ESIntegTestCase {
assertHitCount(response, 1);
}
public void testNestedSourceFiltering() throws Exception {
assertAcked(prepareCreate("index1").addMapping("message", "comments", "type=nested"));
client().prepareIndex("index1", "message", "1").setSource(jsonBuilder().startObject()
.field("message", "quick brown fox")
.startArray("comments")
.startObject().field("message", "fox eat quick").endObject()
.startObject().field("message", "fox ate rabbit x y z").endObject()
.startObject().field("message", "rabbit got away").endObject()
.endArray()
.endObject()).get();
refresh();
// the field name (comments.message) used for source filtering should be the same as when using that field for
// other features (like in the query dsl or aggs) in order for consistency:
SearchResponse response = client().prepareSearch()
.setQuery(nestedQuery("comments", matchQuery("comments.message", "fox"), ScoreMode.None)
.innerHit(new InnerHitBuilder().setFetchSourceContext(new FetchSourceContext("comments.message"))))
.get();
assertNoFailures(response);
assertHitCount(response, 1);
assertThat(response.getHits().getAt(0).getInnerHits().get("comments").totalHits(), equalTo(2L));
assertThat(extractValue("comments.message", response.getHits().getAt(0).getInnerHits().get("comments").getAt(0).sourceAsMap()),
equalTo("fox eat quick"));
assertThat(extractValue("comments.message", response.getHits().getAt(0).getInnerHits().get("comments").getAt(1).sourceAsMap()),
equalTo("fox ate rabbit x y z"));
}
}

View File

@ -177,6 +177,9 @@ The `coerce` and `ignore_malformed` parameters were deprecated in favour of `val
`has_child` and `has_parent` queries. Use cases previously only possible with top level inner hits can now be done
with inner hits defined inside the query dsl.
* Source filtering for inner hits inside nested queries requires full field names instead of relative field names.
This is now consistent for source filtering on other places in the search API.
==== Query Profiler
In the response for profiling queries, the `query_type` has been renamed to `type` and `lucene` has been renamed to