From b9631442542e15784d1db8f7f88bf4e02179c8a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Wed, 15 Feb 2017 16:52:17 +0100 Subject: [PATCH] Add xcontent parsing to completion suggestion option (#23071) This adds parsing from xContent to the CompletionSuggestion.Entry.Option. The completion suggestion option also inlines the xContent rendering of the containes SearchHit, so in order to reuse the SearchHit parser this also changes the way SearchHit is parsed from using a loop-based parser to using a ConstructingObjectParser that creates an intermediate map representation and then later uses this output to create either a single SearchHit or use it with additional fields defined in the parser for the completion suggestion option. --- .../org/elasticsearch/search/SearchHit.java | 309 ++++++++++-------- .../completion/CompletionSuggestion.java | 72 +++- .../CompletionSuggestionOptionTests.java | 98 ++++++ 3 files changed, 337 insertions(+), 142 deletions(-) create mode 100644 core/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestionOptionTests.java diff --git a/core/src/main/java/org/elasticsearch/search/SearchHit.java b/core/src/main/java/org/elasticsearch/search/SearchHit.java index cccf8dbed98..05558fd6f09 100644 --- a/core/src/main/java/org/elasticsearch/search/SearchHit.java +++ b/core/src/main/java/org/elasticsearch/search/SearchHit.java @@ -33,6 +33,8 @@ import org.elasticsearch.common.io.stream.Streamable; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.text.Text; import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ObjectParser; +import org.elasticsearch.common.xcontent.ObjectParser.ValueType; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -43,11 +45,11 @@ import org.elasticsearch.index.mapper.SourceFieldMapper; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; import org.elasticsearch.search.lookup.SourceLookup; +import org.elasticsearch.search.suggest.completion.CompletionSuggestion; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -64,7 +66,6 @@ import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optiona import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; import static org.elasticsearch.common.xcontent.XContentParserUtils.parseStoredFieldsValue; import static org.elasticsearch.common.xcontent.XContentParserUtils.throwUnknownField; -import static org.elasticsearch.common.xcontent.XContentParserUtils.throwUnknownToken; import static org.elasticsearch.search.fetch.subphase.highlight.HighlightField.readHighlightField; /** @@ -369,6 +370,14 @@ public final class SearchHit implements Streamable, ToXContentObject, Iterable metaFields = new ArrayList<>(); @@ -462,121 +471,78 @@ public final class SearchHit implements Streamable, ToXContentObject, Iterable highlightFields = new HashMap<>(); - BytesReference parsedSource = null; - List matchedQueries = new ArrayList<>(); - Map fields = new HashMap<>(); - Explanation explanation = null; - ShardId shardId = null; - String nodeId = null; - Map innerHits = null; - while((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { - currentFieldName = parser.currentName(); - } else if (token.isValue()) { - if (Fields._TYPE.equals(currentFieldName)) { - type = parser.text(); - } else if (Fields._INDEX.equals(currentFieldName)) { - index = parser.text(); - } else if (Fields._ID.equals(currentFieldName)) { - id = parser.text(); - } else if (Fields._SCORE.equals(currentFieldName)) { - score = parser.floatValue(); - } else if (Fields._VERSION.equals(currentFieldName)) { - version = parser.longValue(); - } else if (Fields._SHARD.equals(currentFieldName)) { - shardId = ShardId.fromString(parser.text()); - } else if (Fields._NODE.equals(currentFieldName)) { - nodeId = parser.text(); - } else if (MapperService.isMetadataField(currentFieldName)) { - List values = new ArrayList<>(); - values.add(parseStoredFieldsValue(parser)); - fields.put(currentFieldName, new SearchHitField(currentFieldName, values)); - } else { - throwUnknownField(currentFieldName, parser.getTokenLocation()); - } - } else if (token == XContentParser.Token.VALUE_NULL) { - if (Fields._SCORE.equals(currentFieldName)) { - score = Float.NaN; - } else { - throwUnknownField(currentFieldName, parser.getTokenLocation()); - } - } else if (token == XContentParser.Token.START_OBJECT) { - if (SourceFieldMapper.NAME.equals(currentFieldName)) { - try (XContentBuilder builder = XContentBuilder.builder(parser.contentType().xContent())) { - //the original document gets slightly modified: whitespaces or pretty printing are not preserved, - //it all depends on the current builder settings - builder.copyCurrentStructure(parser); - parsedSource = builder.bytes(); - } - } else if (Fields.HIGHLIGHT.equals(currentFieldName)) { - while((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - HighlightField highlightField = HighlightField.fromXContent(parser); - highlightFields.put(highlightField.getName(), highlightField); - } - } else if (Fields.FIELDS.equals(currentFieldName)) { - while((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - String fieldName = parser.currentName(); - List values = new ArrayList<>(); - ensureExpectedToken(XContentParser.Token.START_ARRAY, parser.nextToken(), parser::getTokenLocation); - while((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - values.add(parseStoredFieldsValue(parser)); - } - fields.put(fieldName, new SearchHitField(fieldName, values)); - } - } else if (Fields._EXPLANATION.equals(currentFieldName)) { - explanation = parseExplanation(parser); - } else if (Fields.INNER_HITS.equals(currentFieldName)) { - innerHits = new HashMap<>(); - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - // parse the key - ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser::getTokenLocation); - String name = parser.currentName(); - innerHits.put(name, SearchHits.fromXContent(parser)); - parser.nextToken(); - ensureExpectedToken(XContentParser.Token.END_OBJECT, parser.currentToken(), parser::getTokenLocation); - } - } else if (NestedIdentity.Fields._NESTED.equals(currentFieldName)) { - nestedIdentity = NestedIdentity.fromXContent(parser); - } else { - throwUnknownField(currentFieldName, parser.getTokenLocation()); - } - } else if (token == XContentParser.Token.START_ARRAY) { - if (Fields.SORT.equals(currentFieldName)) { - sortValues = SearchSortValues.fromXContent(parser); - } else if (Fields.MATCHED_QUERIES.equals(currentFieldName)) { - while((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - matchedQueries.add(parser.text()); - } - } else { - throwUnknownField(currentFieldName, parser.getTokenLocation()); - } - } else { - throwUnknownToken(token, parser.getTokenLocation()); - } - } - SearchHit searchHit = new SearchHit(-1, id, new Text(type), nestedIdentity, Collections.emptyMap()); - searchHit.index = index; - searchHit.score(score); - searchHit.version(version); - searchHit.sortValues(sortValues); - searchHit.highlightFields(highlightFields); - searchHit.sourceRef(parsedSource); - searchHit.explanation(explanation); - searchHit.setInnerHits(innerHits); - if (matchedQueries.size() > 0) { + /** + * This parser outputs a temporary map of the objects needed to create the + * SearchHit instead of directly creating the SearchHit. The reason for this + * is that this way we can reuse the parser when parsing xContent from + * {@link CompletionSuggestion.Entry.Option} which unfortunately inlines the + * output of + * {@link #toInnerXContent(XContentBuilder, org.elasticsearch.common.xcontent.ToXContent.Params)} + * of the included search hit. The output of the map is used to create the + * actual SearchHit instance via {@link #createFromMap(Map)} + */ + private static ObjectParser, Void> MAP_PARSER = new ObjectParser<>("innerHitsParser", HashMap::new); + + static { + declareInnerHitsParseFields(MAP_PARSER); + } + + public static SearchHit fromXContent(XContentParser parser) { + return createFromMap(MAP_PARSER.apply(parser, null)); + } + + public static void declareInnerHitsParseFields(ObjectParser, Void> parser) { + declareMetaDataFields(parser); + parser.declareString((map, value) -> map.put(Fields._TYPE, value), new ParseField(Fields._TYPE)); + parser.declareString((map, value) -> map.put(Fields._INDEX, value), new ParseField(Fields._INDEX)); + parser.declareString((map, value) -> map.put(Fields._ID, value), new ParseField(Fields._ID)); + parser.declareString((map, value) -> map.put(Fields._NODE, value), new ParseField(Fields._NODE)); + parser.declareField((map, value) -> map.put(Fields._SCORE, value), SearchHit::parseScore, new ParseField(Fields._SCORE), + ValueType.FLOAT_OR_NULL); + parser.declareLong((map, value) -> map.put(Fields._VERSION, value), new ParseField(Fields._VERSION)); + parser.declareField((map, value) -> map.put(Fields._SHARD, value), (p, c) -> ShardId.fromString(p.text()), + new ParseField(Fields._SHARD), ValueType.STRING); + parser.declareObject((map, value) -> map.put(SourceFieldMapper.NAME, value), (p, c) -> parseSourceBytes(p), + new ParseField(SourceFieldMapper.NAME)); + parser.declareObject((map, value) -> map.put(Fields.HIGHLIGHT, value), (p, c) -> parseHighlightFields(p), + new ParseField(Fields.HIGHLIGHT)); + parser.declareObject((map, value) -> { + Map fieldMap = get(Fields.FIELDS, map, new HashMap()); + fieldMap.putAll(value); + map.put(Fields.FIELDS, fieldMap); + }, (p, c) -> parseFields(p), new ParseField(Fields.FIELDS)); + parser.declareObject((map, value) -> map.put(Fields._EXPLANATION, value), (p, c) -> parseExplanation(p), + new ParseField(Fields._EXPLANATION)); + parser.declareObject((map, value) -> map.put(NestedIdentity._NESTED, value), NestedIdentity::fromXContent, + new ParseField(NestedIdentity._NESTED)); + parser.declareObject((map, value) -> map.put(Fields.INNER_HITS, value), (p,c) -> parseInnerHits(p), + new ParseField(Fields.INNER_HITS)); + parser.declareStringArray((map, list) -> map.put(Fields.MATCHED_QUERIES, list), new ParseField(Fields.MATCHED_QUERIES)); + parser.declareField((map, list) -> map.put(Fields.SORT, list), SearchSortValues::fromXContent, new ParseField(Fields.SORT), + ValueType.OBJECT_ARRAY); + } + + public static SearchHit createFromMap(Map values) { + String id = get(Fields._ID, values, null); + String type = get(Fields._TYPE, values, null); + NestedIdentity nestedIdentity = get(NestedIdentity._NESTED, values, null); + Map fields = get(Fields.FIELDS, values, null); + + SearchHit searchHit = new SearchHit(-1, id, new Text(type), nestedIdentity, fields); + searchHit.index = get(Fields._INDEX, values, null); + searchHit.score(get(Fields._SCORE, values, DEFAULT_SCORE)); + searchHit.version(get(Fields._VERSION, values, -1L)); + searchHit.sortValues(get(Fields.SORT, values, SearchSortValues.EMPTY)); + searchHit.highlightFields(get(Fields.HIGHLIGHT, values, null)); + searchHit.sourceRef(get(SourceFieldMapper.NAME, values, null)); + searchHit.explanation(get(Fields._EXPLANATION, values, null)); + searchHit.setInnerHits(get(Fields.INNER_HITS, values, null)); + List matchedQueries = get(Fields.MATCHED_QUERIES, values, null); + if (matchedQueries != null) { searchHit.matchedQueries(matchedQueries.toArray(new String[matchedQueries.size()])); } + ShardId shardId = get(Fields._SHARD, values, null); + String nodeId = get(Fields._NODE, values, null); if (shardId != null && nodeId != null) { searchHit.shard(new SearchShardTarget(nodeId, shardId)); } @@ -584,6 +550,84 @@ public final class SearchHit implements Streamable, ToXContentObject, Iterable T get(String key, Map map, T defaultValue) { + return (T) map.getOrDefault(key, defaultValue); + } + + private static float parseScore(XContentParser parser) throws IOException { + if (parser.currentToken() == XContentParser.Token.VALUE_NUMBER || parser.currentToken() == XContentParser.Token.VALUE_STRING) { + return parser.floatValue(); + } else { + return Float.NaN; + } + } + + private static BytesReference parseSourceBytes(XContentParser parser) throws IOException { + try (XContentBuilder builder = XContentBuilder.builder(parser.contentType().xContent())) { + // the original document gets slightly modified: whitespaces or + // pretty printing are not preserved, + // it all depends on the current builder settings + builder.copyCurrentStructure(parser); + return builder.bytes(); + } + } + + /** + * we need to declare parse fields for each metadata field, except for _ID, _INDEX and _TYPE which are + * handled individually. All other fields are parsed to an entry in the fields map + */ + private static void declareMetaDataFields(ObjectParser, Void> parser) { + for (String metadatafield : MapperService.getAllMetaFields()) { + if (metadatafield.equals(Fields._ID) == false && metadatafield.equals(Fields._INDEX) == false + && metadatafield.equals(Fields._TYPE) == false) { + parser.declareField((map, field) -> { + @SuppressWarnings("unchecked") + Map fieldMap = (Map) map.computeIfAbsent(Fields.FIELDS, + v -> new HashMap()); + fieldMap.put(field.getName(), field); + }, (p, c) -> { + List values = new ArrayList<>(); + values.add(parseStoredFieldsValue(p)); + return new SearchHitField(metadatafield, values); + }, new ParseField(metadatafield), ValueType.VALUE); + } + } + } + + private static Map parseFields(XContentParser parser) throws IOException { + Map fields = new HashMap<>(); + while ((parser.nextToken()) != XContentParser.Token.END_OBJECT) { + String fieldName = parser.currentName(); + ensureExpectedToken(XContentParser.Token.START_ARRAY, parser.nextToken(), parser::getTokenLocation); + List values = new ArrayList<>(); + while ((parser.nextToken()) != XContentParser.Token.END_ARRAY) { + values.add(parseStoredFieldsValue(parser)); + } + fields.put(fieldName, new SearchHitField(fieldName, values)); + } + return fields; + } + + private static Map parseInnerHits(XContentParser parser) throws IOException { + Map innerHits = new HashMap<>(); + while ((parser.nextToken()) != XContentParser.Token.END_OBJECT) { + ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser::getTokenLocation); + innerHits.put(parser.currentName(), SearchHits.fromXContent(parser)); + ensureExpectedToken(XContentParser.Token.END_OBJECT, parser.nextToken(), parser::getTokenLocation); + } + return innerHits; + } + + private static Map parseHighlightFields(XContentParser parser) throws IOException { + Map highlightFields = new HashMap<>(); + while((parser.nextToken()) != XContentParser.Token.END_OBJECT) { + HighlightField highlightField = HighlightField.fromXContent(parser); + highlightFields.put(highlightField.getName(), highlightField); + } + return highlightFields; + } + private static Explanation parseExplanation(XContentParser parser) throws IOException { ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser::getTokenLocation); XContentParser.Token token; @@ -629,15 +673,6 @@ public final class SearchHit implements Streamable, ToXContentObject, Iterable new NestedIdentity((String) ctorArgs[0], (int) ctorArgs[1], (NestedIdentity) ctorArgs[2])); static { - PARSER.declareString(constructorArg(), new ParseField(Fields._NESTED_FIELD)); - PARSER.declareInt(constructorArg(), new ParseField(Fields._NESTED_OFFSET)); - PARSER.declareObject(optionalConstructorArg(), PARSER, new ParseField(Fields._NESTED)); + PARSER.declareString(constructorArg(), new ParseField(FIELD)); + PARSER.declareInt(constructorArg(), new ParseField(OFFSET)); + PARSER.declareObject(optionalConstructorArg(), PARSER, new ParseField(_NESTED)); + } + + static NestedIdentity fromXContent(XContentParser parser, Void context) { + return fromXContent(parser); } public static NestedIdentity fromXContent(XContentParser parser) { @@ -896,11 +939,5 @@ public final class SearchHit implements Streamable, ToXContentObject, Iterable> contexts; + private Map> contexts = Collections.emptyMap(); private ScoreDoc doc; private SearchHit hit; + public static final ParseField CONTEXTS = new ParseField("contexts"); + public Option(int docID, Text text, float score, Map> contexts) { super(text, score); this.doc = new ScoreDoc(docID, score); - this.contexts = contexts; + this.contexts = Objects.requireNonNull(contexts, "context map cannot be null"); } protected Option() { @@ -240,14 +249,14 @@ public final class CompletionSuggestion extends Suggest.Suggestion 0) { - builder.startObject("contexts"); + builder.startObject(CONTEXTS.getPreferredName()); for (Map.Entry> entry : contexts.entrySet()) { builder.startArray(entry.getKey()); for (CharSequence context : entry.getValue()) { @@ -260,6 +269,58 @@ public final class CompletionSuggestion extends Suggest.Suggestion, Void> PARSER = new ObjectParser<>("CompletionOptionParser", + true, HashMap::new); + + static { + SearchHit.declareInnerHitsParseFields(PARSER); + PARSER.declareString((map, value) -> map.put(Suggestion.Entry.Option.TEXT.getPreferredName(), value), + Suggestion.Entry.Option.TEXT); + PARSER.declareFloat((map, value) -> map.put(Suggestion.Entry.Option.SCORE.getPreferredName(), value), + Suggestion.Entry.Option.SCORE); + PARSER.declareObject((map, value) -> map.put(CompletionSuggestion.Entry.Option.CONTEXTS.getPreferredName(), value), + (p,c) -> parseContexts(p), CompletionSuggestion.Entry.Option.CONTEXTS); + } + + private static Map> parseContexts(XContentParser parser) throws IOException { + Map> contexts = new HashMap<>(); + while((parser.nextToken()) != XContentParser.Token.END_OBJECT) { + ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser::getTokenLocation); + String key = parser.currentName(); + ensureExpectedToken(XContentParser.Token.START_ARRAY, parser.nextToken(), parser::getTokenLocation); + Set values = new HashSet<>(); + while((parser.nextToken()) != XContentParser.Token.END_ARRAY) { + ensureExpectedToken(XContentParser.Token.VALUE_STRING, parser.currentToken(), parser::getTokenLocation); + values.add(parser.text()); + } + contexts.put(key, values); + } + return contexts; + } + + public static Option fromXContent(XContentParser parser) { + Map values = PARSER.apply(parser, null); + + Text text = new Text((String) values.get(Suggestion.Entry.Option.TEXT.getPreferredName())); + Float score = (Float) values.get(Suggestion.Entry.Option.SCORE.getPreferredName()); + @SuppressWarnings("unchecked") + Map> contexts = (Map>) values + .get(CompletionSuggestion.Entry.Option.CONTEXTS.getPreferredName()); + if (contexts == null) { + contexts = Collections.emptyMap(); + } + + SearchHit hit = null; + // the option either prints SCORE or inlines the search hit + if (score == null) { + hit = SearchHit.createFromMap(values); + score = hit.getScore(); + } + CompletionSuggestion.Entry.Option option = new CompletionSuggestion.Entry.Option(-1, text, score, contexts); + option.setHit(hit); + return option; + } + @Override public void readFrom(StreamInput in) throws IOException { super.readFrom(in); @@ -317,7 +378,6 @@ public final class CompletionSuggestion extends Suggest.Suggestion> contexts = new HashMap<>(); + for (int i = 0; i < numberOfContexts; i++) { + int numberOfValues = randomIntBetween(0, 3); + Set values = new HashSet<>(); + for (int v = 0; v < numberOfValues; v++) { + values.add(randomAsciiOfLengthBetween(5, 15)); + } + contexts.put(randomAsciiOfLengthBetween(5, 15), values); + } + SearchHit hit = null; + float score = randomFloat(); + if (randomBoolean()) { + hit = SearchHitTests.createTestItem(false); + score = hit.getScore(); + } + Option option = new CompletionSuggestion.Entry.Option(docId, text, score, contexts); + option.setHit(hit); + return option; + } + + public void testFromXContent() throws IOException { + Option option = createTestItem(); + XContentType xContentType = randomFrom(XContentType.values()); + boolean humanReadable = randomBoolean(); + BytesReference originalBytes = toXContent(option, xContentType, humanReadable); + if (randomBoolean()) { + try (XContentParser parser = createParser(xContentType.xContent(), originalBytes)) { + originalBytes = shuffleXContent(parser, randomBoolean()).bytes(); + } + } + Option parsed; + try (XContentParser parser = createParser(xContentType.xContent(), originalBytes)) { + parsed = Option.fromXContent(parser); + assertNull(parser.nextToken()); + } + assertEquals(option.getText(), parsed.getText()); + assertEquals(option.getHighlighted(), parsed.getHighlighted()); + assertEquals(option.getScore(), parsed.getScore(), Float.MIN_VALUE); + assertEquals(option.collateMatch(), parsed.collateMatch()); + assertEquals(option.getContexts(), parsed.getContexts()); + assertToXContentEquivalent(originalBytes, toXContent(parsed, xContentType, humanReadable), xContentType); + } + + public void testToXContent() throws IOException { + Map> contexts = Collections.singletonMap("key", Collections.singleton("value")); + CompletionSuggestion.Entry.Option option = new CompletionSuggestion.Entry.Option(1, new Text("someText"), 1.3f, contexts); + BytesReference xContent = toXContent(option, XContentType.JSON, randomBoolean()); + assertEquals("{\"text\":\"someText\",\"score\":1.3,\"contexts\":{\"key\":[\"value\"]}}" + , xContent.utf8ToString()); + } +}