From ffa5919d7caf829ae7ac9fc0a634c0b50f7e7f59 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Mon, 1 Jul 2019 12:08:50 +0300 Subject: [PATCH] Add support for 'flattened object' fields. (#43762) This commit merges the `object-fields` feature branch. The new 'flattened object' field type allows an entire JSON object to be indexed into a field, and provides limited search functionality over the field's contents. --- docs/reference/mapping/types.asciidoc | 9 +- .../mapping/types/flattened.asciidoc | 188 ++++++ docs/reference/rest-api/info.asciidoc | 4 + .../fielddata/IndexOrdinalsFieldData.java | 7 + .../GlobalOrdinalsIndexFieldData.java | 11 + .../plain/AbstractIndexOrdinalsFieldData.java | 5 + .../SortedSetDVOrdinalsIndexFieldData.java | 5 + .../index/mapper/ContentPath.java | 4 + .../index/mapper/DynamicKeyFieldMapper.java | 54 ++ .../index/mapper/FieldMapper.java | 2 +- .../index/mapper/FieldTypeLookup.java | 128 +++- .../index/mapper/MapperService.java | 5 +- .../index/query/QueryShardContext.java | 3 +- .../index/search/QueryParserHelper.java | 18 +- .../bucket/terms/TermsAggregatorFactory.java | 9 +- .../aggregations/support/ValuesSource.java | 14 + .../fielddata/IndexFieldDataServiceTests.java | 2 +- .../mapper/FieldNamesFieldTypeTests.java | 2 +- .../index/query/MatchQueryBuilderTests.java | 3 +- .../index/search/MatchPhraseQueryIT.java | 5 +- .../bucket/terms/StringTermsIT.java | 7 +- .../search/lookup/LeafDocLookupTests.java | 21 +- .../search/query/QueryStringIT.java | 1 + .../search/query/SimpleQueryStringIT.java | 1 - .../license/XPackLicenseState.java | 9 + .../xpack/core/XPackClientPlugin.java | 2 + .../elasticsearch/xpack/core/XPackField.java | 2 + .../xpack/core/XPackSettings.java | 5 + .../flattened/FlattenedFeatureSetUsage.java | 24 + x-pack/plugin/mapper-flattened/build.gradle | 27 + .../xpack/flattened/FlattenedFeatureSet.java | 54 ++ .../flattened/FlattenedMapperPlugin.java | 47 ++ .../mapper/FlatObjectFieldMapper.java | 616 ++++++++++++++++++ .../mapper/FlatObjectFieldParser.java | 167 +++++ .../KeyedFlatObjectAtomicFieldData.java | 251 +++++++ .../mapper/FlatObjectFieldLookupTests.java | 188 ++++++ .../mapper/FlatObjectFieldMapperTests.java | 457 +++++++++++++ .../mapper/FlatObjectFieldParserTests.java | 318 +++++++++ .../mapper/FlatObjectIndexFieldDataTests.java | 106 +++ .../mapper/FlatObjectSearchTests.java | 512 +++++++++++++++ .../KeyedFlatObjectAtomicFieldDataTests.java | 204 ++++++ .../mapper/KeyedFlatObjectFieldTypeTests.java | 155 +++++ .../mapper/RootFlatObjectFieldTypeTests.java | 116 ++++ .../rest-api-spec/test/flattened/10_basic.yml | 111 ++++ 44 files changed, 3840 insertions(+), 39 deletions(-) create mode 100644 docs/reference/mapping/types/flattened.asciidoc create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/DynamicKeyFieldMapper.java create mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/flattened/FlattenedFeatureSetUsage.java create mode 100644 x-pack/plugin/mapper-flattened/build.gradle create mode 100644 x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/FlattenedFeatureSet.java create mode 100644 x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/FlattenedMapperPlugin.java create mode 100644 x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectFieldMapper.java create mode 100644 x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectFieldParser.java create mode 100644 x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/mapper/KeyedFlatObjectAtomicFieldData.java create mode 100644 x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/index/mapper/FlatObjectFieldLookupTests.java create mode 100644 x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectFieldMapperTests.java create mode 100644 x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectFieldParserTests.java create mode 100644 x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectIndexFieldDataTests.java create mode 100644 x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectSearchTests.java create mode 100644 x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/KeyedFlatObjectAtomicFieldDataTests.java create mode 100644 x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/KeyedFlatObjectFieldTypeTests.java create mode 100644 x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/RootFlatObjectFieldTypeTests.java create mode 100644 x-pack/plugin/src/test/resources/rest-api-spec/test/flattened/10_basic.yml diff --git a/docs/reference/mapping/types.asciidoc b/docs/reference/mapping/types.asciidoc index c0db156dc3a..bdfcf1128a0 100644 --- a/docs/reference/mapping/types.asciidoc +++ b/docs/reference/mapping/types.asciidoc @@ -42,8 +42,6 @@ string:: <> and <> <>:: Defines parent/child relation for documents within the same index -<>:: Defines an alias to an existing field. - <>:: Record numeric feature to boost hits at query time. <>:: Record numeric features to boost hits at query time. @@ -54,6 +52,11 @@ string:: <> and <> <>:: A text-like field optimized for queries to implement as-you-type completion +<>:: Defines an alias to an existing field. + +<>:: Allows an entire JSON object to be indexed as a single field. + + [float] === Multi-fields @@ -82,6 +85,8 @@ include::types/date.asciidoc[] include::types/date_nanos.asciidoc[] +include::types/flattened.asciidoc[] + include::types/geo-point.asciidoc[] include::types/geo-shape.asciidoc[] diff --git a/docs/reference/mapping/types/flattened.asciidoc b/docs/reference/mapping/types/flattened.asciidoc new file mode 100644 index 00000000000..80fd72c3dcc --- /dev/null +++ b/docs/reference/mapping/types/flattened.asciidoc @@ -0,0 +1,188 @@ +[role="xpack"] +[testenv="basic"] + +[[flattened]] +=== Flattened datatype + +By default, each subfield in an object is mapped and indexed separately. If +the names or types of the subfields are not known in advance, then they are +<>. + +The `flattened` type provides an alternative approach, where the entire +object is mapped as a single field. Given an object, the `flattened` +mapping will parse out its leaf values and index them into one field as +keywords. The object's contents can then be searched through simple queries +and aggregations. + +This data type can be useful for indexing objects with a large or unknown +number of unique keys. Only one field mapping is created for the whole JSON +object, which can help prevent a <> +from having too many distinct field mappings. + +On the other hand, flattened object fields present a trade-off in terms of +search functionality. Only basic queries are allowed, with no support for +numeric range queries or highlighting. Further information on the limitations +can be found in the <> section. + +NOTE: The `flattened` mapping type should **not** be used for indexing all +document content, as it treats all values as keywords and does not provide full +search functionality. The default approach, where each subfield has its own +entry in the mappings, works well in the majority of cases. + +An flattened object field can be created as follows: +[source,js] +-------------------------------- +PUT bug_reports +{ + "mappings": { + "properties": { + "title": { + "type": "text" + }, + "labels": { + "type": "flattened" + } + } + } +} + +POST bug_reports/_doc/1 +{ + "title": "Results are not sorted correctly.", + "labels": { + "priority": "urgent", + "release": ["v1.2.5", "v1.3.0"], + "timestamp": { + "created": 1541458026, + "closed": 1541457010 + } + } +} +-------------------------------- +// CONSOLE +// TESTSETUP + +During indexing, tokens are created for each leaf value in the JSON object. The +values are indexed as string keywords, without analysis or special handling for +numbers or dates. + +Querying the top-level `flattened` field searches all leaf values in the +object: + +[source,js] +-------------------------------- +POST bug_reports/_search +{ + "query": { + "term": {"labels": "urgent"} + } +} +-------------------------------- +// CONSOLE + +To query on a specific key in the flattened object, object dot notation is used: +[source,js] +-------------------------------- +POST bug_reports/_search +{ + "query": { + "term": {"labels.release": "v1.3.0"} + } +} +-------------------------------- +// CONSOLE + +[[supported-operations]] +==== Supported operations + +Because of the similarities in the way values are indexed, `flattened` +fields share much of the same mapping and search functionality as +<> fields. + +Currently, flattened object fields can be used with the following query types: + +- `term`, `terms`, and `terms_set` +- `prefix` +- `range` +- `match` and `multi_match` +- `query_string` and `simple_query_string` +- `exists` + +When querying, it is not possible to refer to field keys using wildcards, as in +`{ "term": {"labels.time*": 1541457010}}`. Note that all queries, including +`range`, treat the values as string keywords. Highlighting is not supported on +`flattened` fields. + +It is possible to sort on an flattened object field, as well as perform simple +keyword-style aggregations such as `terms`. As with queries, there is no +special support for numerics -- all values in the JSON object are treated as +keywords. When sorting, this implies that values are compared +lexicographically. + +Flattened object fields currently cannot be stored. It is not possible to +specify the <> parameter in the mapping. + +[[flattened-params]] +==== Parameters for flattened object fields + +The following mapping parameters are accepted: + +[horizontal] + +<>:: + + Mapping field-level query time boosting. Accepts a floating point number, + defaults to `1.0`. + +`depth_limit`:: + + The maximum allowed depth of the flattened object field, in terms of nested + inner objects. If a flattened object field exceeds this limit, then an + error will be thrown. Defaults to `20`. + +<>:: + + Should the field be stored on disk in a column-stride fashion, so that it + can later be used for sorting, aggregations, or scripting? Accepts `true` + (default) or `false`. + +<>:: + + Should global ordinals be loaded eagerly on refresh? Accepts `true` or + `false` (default). Enabling this is a good idea on fields that are + frequently used for terms aggregations. + +<>:: + + Leaf values longer than this limit will not be indexed. By default, there + is no limit and all values will be indexed. Note that this limit applies + to the leaf values within the flattened object field, and not the length of + the entire field. + +<>:: + + Determines if the field should be searchable. Accepts `true` (default) or + `false`. + +<>:: + + What information should be stored in the index for scoring purposes. + Defaults to `docs` but can also be set to `freqs` to take term frequency + into account when computing scores. + +<>:: + + A string value which is substituted for any explicit `null` values within + the flattened object field. Defaults to `null`, which means null sields are + treated as if it were missing. + +<>:: + + Which scoring algorithm or _similarity_ should be used. Defaults + to `BM25`. + +`split_queries_on_whitespace`:: + + Whether <> should split the input on + whitespace when building a query for this field. Accepts `true` or `false` + (default). diff --git a/docs/reference/rest-api/info.asciidoc b/docs/reference/rest-api/info.asciidoc index 9fc44d12e6f..f6a4c3d4454 100644 --- a/docs/reference/rest-api/info.asciidoc +++ b/docs/reference/rest-api/info.asciidoc @@ -71,6 +71,10 @@ Example response: "available" : true, "enabled" : true }, + "flattened" : { + "available" : true, + "enabled" : true + }, "graph" : { "available" : true, "enabled" : true diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/IndexOrdinalsFieldData.java b/server/src/main/java/org/elasticsearch/index/fielddata/IndexOrdinalsFieldData.java index 8a9fabc9e13..9d4dbaef211 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/IndexOrdinalsFieldData.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/IndexOrdinalsFieldData.java @@ -47,4 +47,11 @@ public interface IndexOrdinalsFieldData extends IndexFieldData.Global mapperBuilder) { multiFieldsBuilder.add(mapperBuilder); return builder; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldTypeLookup.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldTypeLookup.java index cc2bbd65c4d..f26e8f3a1ee 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldTypeLookup.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldTypeLookup.java @@ -20,11 +20,13 @@ package org.elasticsearch.index.mapper; import org.elasticsearch.common.collect.CopyOnWriteHashMap; +import org.elasticsearch.common.collect.Iterators; import org.elasticsearch.common.regex.Regex; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; +import java.util.Map; import java.util.Objects; import java.util.Set; @@ -36,15 +38,30 @@ class FieldTypeLookup implements Iterable { final CopyOnWriteHashMap fullNameToFieldType; private final CopyOnWriteHashMap aliasToConcreteName; + private final CopyOnWriteHashMap dynamicKeyMappers; + + /** + * The maximum field depth of any mapper that implements {@link DynamicKeyFieldMapper}. + * Allows us stop searching for a 'dynamic key' mapper as soon as we've passed the maximum + * possible field depth. + */ + private final int maxDynamicKeyDepth; + FieldTypeLookup() { fullNameToFieldType = new CopyOnWriteHashMap<>(); aliasToConcreteName = new CopyOnWriteHashMap<>(); + dynamicKeyMappers = new CopyOnWriteHashMap<>(); + maxDynamicKeyDepth = 0; } private FieldTypeLookup(CopyOnWriteHashMap fullNameToFieldType, - CopyOnWriteHashMap aliasToConcreteName) { + CopyOnWriteHashMap aliasToConcreteName, + CopyOnWriteHashMap dynamicKeyMappers, + int maxDynamicKeyDepth) { this.fullNameToFieldType = fullNameToFieldType; this.aliasToConcreteName = aliasToConcreteName; + this.dynamicKeyMappers = dynamicKeyMappers; + this.maxDynamicKeyDepth = maxDynamicKeyDepth; } /** @@ -63,14 +80,21 @@ class FieldTypeLookup implements Iterable { CopyOnWriteHashMap fullName = this.fullNameToFieldType; CopyOnWriteHashMap aliases = this.aliasToConcreteName; + CopyOnWriteHashMap dynamicKeyMappers = this.dynamicKeyMappers; for (FieldMapper fieldMapper : fieldMappers) { + String fieldName = fieldMapper.name(); MappedFieldType fieldType = fieldMapper.fieldType(); MappedFieldType fullNameFieldType = fullName.get(fieldType.name()); if (!Objects.equals(fieldType, fullNameFieldType)) { fullName = fullName.copyAndPut(fieldType.name(), fieldType); } + + if (fieldMapper instanceof DynamicKeyFieldMapper) { + DynamicKeyFieldMapper dynamicKeyMapper = (DynamicKeyFieldMapper) fieldMapper; + dynamicKeyMappers = dynamicKeyMappers.copyAndPut(fieldName, dynamicKeyMapper); + } } for (FieldAliasMapper fieldAliasMapper : fieldAliasMappers) { @@ -79,19 +103,97 @@ class FieldTypeLookup implements Iterable { aliases = aliases.copyAndPut(aliasName, path); } - return new FieldTypeLookup(fullName, aliases); + int maxDynamicKeyDepth = getMaxDynamicKeyDepth(aliases, dynamicKeyMappers); + + return new FieldTypeLookup(fullName, aliases, dynamicKeyMappers, maxDynamicKeyDepth); } - /** Returns the field for the given field */ + private static int getMaxDynamicKeyDepth(CopyOnWriteHashMap aliases, + CopyOnWriteHashMap dynamicKeyMappers) { + int maxFieldDepth = 0; + for (Map.Entry entry : aliases.entrySet()) { + String aliasName = entry.getKey(); + String path = entry.getValue(); + if (dynamicKeyMappers.containsKey(path)) { + maxFieldDepth = Math.max(maxFieldDepth, fieldDepth(aliasName)); + } + } + + for (String fieldName : dynamicKeyMappers.keySet()) { + if (dynamicKeyMappers.containsKey(fieldName)) { + maxFieldDepth = Math.max(maxFieldDepth, fieldDepth(fieldName)); + } + } + + return maxFieldDepth; + } + + /** + * Computes the total depth of this field by counting the number of parent fields + * in its path. As an example, the field 'parent1.parent2.field' has depth 3. + */ + private static int fieldDepth(String field) { + int numDots = 0; + int dotIndex = -1; + while (true) { + dotIndex = field.indexOf('.', dotIndex + 1); + if (dotIndex < 0) { + break; + } + numDots++; + } + return numDots + 1; + } + + /** + * Returns the mapped field type for the given field name. + */ public MappedFieldType get(String field) { String concreteField = aliasToConcreteName.getOrDefault(field, field); - return fullNameToFieldType.get(concreteField); + MappedFieldType fieldType = fullNameToFieldType.get(concreteField); + if (fieldType != null) { + return fieldType; + } + + // If the mapping contains fields that support dynamic sub-key lookup, check + // if this could correspond to a keyed field of the form 'path_to_field.path_to_key'. + return !dynamicKeyMappers.isEmpty() ? getKeyedFieldType(field) : null; + } + + /** + * Check if the given field corresponds to a dynamic lookup mapper of the + * form 'path_to_field.path_to_key'. If so, returns a field type that + * can be used to perform searches on this field. + */ + private MappedFieldType getKeyedFieldType(String field) { + int dotIndex = -1; + int fieldDepth = 0; + + while (true) { + if (++fieldDepth > maxDynamicKeyDepth) { + return null; + } + + dotIndex = field.indexOf('.', dotIndex + 1); + if (dotIndex < 0) { + return null; + } + + String parentField = field.substring(0, dotIndex); + String concreteField = aliasToConcreteName.getOrDefault(parentField, parentField); + DynamicKeyFieldMapper mapper = dynamicKeyMappers.get(concreteField); + + if (mapper != null) { + String key = field.substring(dotIndex + 1); + return mapper.keyedFieldType(key); + } + } } /** * Returns a list of the full names of a simple match regex like pattern against full name and index name. */ - public Collection simpleMatchToFullName(String pattern) { + public Set simpleMatchToFullName(String pattern) { Set fields = new HashSet<>(); for (MappedFieldType fieldType : this) { if (Regex.simpleMatch(pattern, fieldType.name())) { @@ -108,6 +210,20 @@ class FieldTypeLookup implements Iterable { @Override public Iterator iterator() { - return fullNameToFieldType.values().iterator(); + Iterator concreteFieldTypes = fullNameToFieldType.values().iterator(); + + if (dynamicKeyMappers.isEmpty()) { + return concreteFieldTypes; + } else { + Iterator keyedFieldTypes = dynamicKeyMappers.values().stream() + .map(mapper -> mapper.keyedFieldType("")) + .iterator(); + return Iterators.concat(concreteFieldTypes, keyedFieldTypes); + } + } + + // Visible for testing. + int maxKeyedLookupDepth() { + return maxDynamicKeyDepth; } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java index fc7c94372f1..36d8d6d8a64 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java @@ -21,7 +21,6 @@ package org.elasticsearch.index.mapper; import com.carrotsearch.hppc.ObjectHashSet; import com.carrotsearch.hppc.cursors.ObjectCursor; - import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.analysis.Analyzer; @@ -750,10 +749,10 @@ public class MapperService extends AbstractIndexComponent implements Closeable { * Returns all the fields that match the given pattern. If the pattern is prefixed with a type * then the fields will be returned with a type prefix. */ - public Collection simpleMatchToFullName(String pattern) { + public Set simpleMatchToFullName(String pattern) { if (Regex.isSimpleMatchPattern(pattern) == false) { // no wildcards - return Collections.singletonList(pattern); + return Collections.singleton(pattern); } return fieldTypes.simpleMatchToFullName(pattern); } diff --git a/server/src/main/java/org/elasticsearch/index/query/QueryShardContext.java b/server/src/main/java/org/elasticsearch/index/query/QueryShardContext.java index 35430e8ab60..c675aeff332 100644 --- a/server/src/main/java/org/elasticsearch/index/query/QueryShardContext.java +++ b/server/src/main/java/org/elasticsearch/index/query/QueryShardContext.java @@ -65,6 +65,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.BiConsumer; import java.util.function.BiFunction; import java.util.function.Function; @@ -205,7 +206,7 @@ public class QueryShardContext extends QueryRewriteContext { * Returns all the fields that match a given pattern. If prefixed with a * type then the fields will be returned with a type prefix. */ - public Collection simpleMatchToIndexNames(String pattern) { + public Set simpleMatchToIndexNames(String pattern) { return mapperService.simpleMatchToFullName(pattern); } diff --git a/server/src/main/java/org/elasticsearch/index/search/QueryParserHelper.java b/server/src/main/java/org/elasticsearch/index/search/QueryParserHelper.java index fafe515ec09..8d6198e17e2 100644 --- a/server/src/main/java/org/elasticsearch/index/search/QueryParserHelper.java +++ b/server/src/main/java/org/elasticsearch/index/search/QueryParserHelper.java @@ -30,6 +30,7 @@ import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; /** * Helpers to extract and expand field names and boosts @@ -130,8 +131,9 @@ public final class QueryParserHelper { */ public static Map resolveMappingField(QueryShardContext context, String fieldOrPattern, float weight, boolean acceptAllTypes, boolean acceptMetadataField, String fieldSuffix) { - Collection allFields = context.simpleMatchToIndexNames(fieldOrPattern); + Set allFields = context.simpleMatchToIndexNames(fieldOrPattern); Map fields = new HashMap<>(); + for (String fieldName : allFields) { if (fieldSuffix != null && context.fieldMapper(fieldName + fieldSuffix) != null) { fieldName = fieldName + fieldSuffix; @@ -159,13 +161,17 @@ public final class QueryParserHelper { // other exceptions are parsing errors or not indexed fields: keep } } - // handle duplicates - float w = weight; - if (fields.containsKey(fieldType.name())) { - w *= fields.get(fieldType.name()); + + // Deduplicate aliases and their concrete fields. + String resolvedFieldName = fieldType.name(); + if (allFields.contains(resolvedFieldName)) { + fieldName = resolvedFieldName; } - fields.put(fieldType.name(), w); + + float w = fields.getOrDefault(fieldName, 1.0F); + fields.put(fieldName, w * weight); } + checkForTooManyFields(fields, context); return fields; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index 877a8e59bc2..a3247a3c630 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -262,21 +262,26 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory 0L, null); diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java index 0fc8e0f1de6..062e38e57c0 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java @@ -151,7 +151,8 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase indexRequests = getIndexRequests(); indexRandom(true, false, indexRequests); - MatchPhraseQueryBuilder baseQuery = QueryBuilders.matchPhraseQuery("name", "the who") + MatchPhraseQueryBuilder baseQuery = matchPhraseQuery("name", "the who") .analyzer("standard_stopwords"); MatchPhraseQueryBuilder matchNoneQuery = baseQuery.zeroTermsQuery(ZeroTermsQuery.NONE); @@ -67,7 +67,6 @@ public class MatchPhraseQueryIT extends ESIntegTestCase { assertHitCount(matchAllResponse, 2L); } - private List getIndexRequests() { List requests = new ArrayList<>(); requests.add(client().prepareIndex(INDEX, "band").setSource("name", "the beatles")); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsIT.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsIT.java index 11eed6f90e7..102d4e0c2ad 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsIT.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsIT.java @@ -42,7 +42,6 @@ import org.elasticsearch.search.aggregations.metrics.ExtendedStats; import org.elasticsearch.search.aggregations.metrics.Stats; import org.elasticsearch.search.aggregations.metrics.Sum; import org.elasticsearch.test.ESIntegTestCase; -import org.hamcrest.Matchers; import org.junit.After; import org.junit.Before; @@ -71,6 +70,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcke import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.startsWith; import static org.hamcrest.core.IsNull.notNullValue; @ESIntegTestCase.SuiteScopeTestCase @@ -138,7 +138,8 @@ public class StringTermsIT extends AbstractTermsTestCase { .startArray(MULTI_VALUED_FIELD_NAME) .value("val" + i) .value("val" + (i + 1)) - .endArray().endObject())); + .endArray() + .endObject())); } getMultiSortDocs(builders); @@ -574,7 +575,7 @@ public class StringTermsIT extends AbstractTermsTestCase { ElasticsearchException rootCause = rootCauses[0]; if (rootCause instanceof AggregationExecutionException) { AggregationExecutionException aggException = (AggregationExecutionException) rootCause; - assertThat(aggException.getMessage(), Matchers.startsWith("Invalid aggregation order path")); + assertThat(aggException.getMessage(), startsWith("Invalid aggregation order path")); } else { throw e; } diff --git a/server/src/test/java/org/elasticsearch/search/lookup/LeafDocLookupTests.java b/server/src/test/java/org/elasticsearch/search/lookup/LeafDocLookupTests.java index fca61bf2564..67420cf5eaa 100644 --- a/server/src/test/java/org/elasticsearch/search/lookup/LeafDocLookupTests.java +++ b/server/src/test/java/org/elasticsearch/search/lookup/LeafDocLookupTests.java @@ -51,13 +51,7 @@ public class LeafDocLookupTests extends ESTestCase { when(mapperService.fullName("alias")).thenReturn(fieldType); docValues = mock(ScriptDocValues.class); - - AtomicFieldData atomicFieldData = mock(AtomicFieldData.class); - doReturn(docValues).when(atomicFieldData).getScriptValues(); - - IndexFieldData fieldData = mock(IndexFieldData.class); - when(fieldData.getFieldName()).thenReturn("field"); - doReturn(atomicFieldData).when(fieldData).load(anyObject()); + IndexFieldData fieldData = createFieldData(docValues); docLookup = new LeafDocLookup(mapperService, ignored -> fieldData, @@ -70,7 +64,7 @@ public class LeafDocLookupTests extends ESTestCase { assertEquals(docValues, fetchedDocValues); } - public void testLookupWithFieldAlias() { + public void testFieldAliases() { ScriptDocValues fetchedDocValues = docLookup.get("alias"); assertEquals(docValues, fetchedDocValues); } @@ -80,4 +74,15 @@ public class LeafDocLookupTests extends ESTestCase { assertEquals(docValues, fetchedDocValues); assertWarnings(TYPES_DEPRECATION_MESSAGE); } + + private IndexFieldData createFieldData(ScriptDocValues scriptDocValues) { + AtomicFieldData atomicFieldData = mock(AtomicFieldData.class); + doReturn(scriptDocValues).when(atomicFieldData).getScriptValues(); + + IndexFieldData fieldData = mock(IndexFieldData.class); + when(fieldData.getFieldName()).thenReturn("field"); + doReturn(atomicFieldData).when(fieldData).load(anyObject()); + + return fieldData; + } } diff --git a/server/src/test/java/org/elasticsearch/search/query/QueryStringIT.java b/server/src/test/java/org/elasticsearch/search/query/QueryStringIT.java index 8d5933e6cfc..0498cc63fb9 100644 --- a/server/src/test/java/org/elasticsearch/search/query/QueryStringIT.java +++ b/server/src/test/java/org/elasticsearch/search/query/QueryStringIT.java @@ -359,6 +359,7 @@ public class QueryStringIT extends ESIntegTestCase { assertHits(response.getHits(), "1"); } + private void assertHits(SearchHits hits, String... ids) { assertThat(hits.getTotalHits().value, equalTo((long) ids.length)); Set hitIds = new HashSet<>(); diff --git a/server/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java b/server/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java index 7f8ab4aa515..2f24b3dec24 100644 --- a/server/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java +++ b/server/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java @@ -642,7 +642,6 @@ public class SimpleQueryStringIT extends ESIntegTestCase { assertHits(response.getHits(), "2", "3"); } - public void testFieldAliasOnDisallowedFieldType() throws Exception { String indexBody = copyToStringFromClasspath("/org/elasticsearch/search/query/all-query-index.json"); assertAcked(prepareCreate("test").setSource(indexBody, XContentType.JSON)); diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/XPackLicenseState.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/XPackLicenseState.java index 99c1e2e9171..859fc074e17 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/XPackLicenseState.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/XPackLicenseState.java @@ -696,6 +696,15 @@ public class XPackLicenseState { return licensed && localStatus.active; } + /** + * Determine if support for flattened object fields should be enabled. + *

+ * Flattened fields are available for all license types except {@link OperationMode#MISSING}. + */ + public synchronized boolean isFlattenedAllowed() { + return status.active; + } + /** * Determine if Vectors support should be enabled. *

diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java index 9fa7e328426..138f8cac48d 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java @@ -57,6 +57,7 @@ import org.elasticsearch.xpack.core.dataframe.transforms.DataFrameTransformState import org.elasticsearch.xpack.core.dataframe.transforms.SyncConfig; import org.elasticsearch.xpack.core.dataframe.transforms.TimeSyncConfig; import org.elasticsearch.xpack.core.deprecation.DeprecationInfoAction; +import org.elasticsearch.xpack.core.flattened.FlattenedFeatureSetUsage; import org.elasticsearch.xpack.core.graph.GraphFeatureSetUsage; import org.elasticsearch.xpack.core.graph.action.GraphExploreAction; import org.elasticsearch.xpack.core.indexlifecycle.AllocateAction; @@ -516,6 +517,7 @@ public class XPackClientPlugin extends Plugin implements ActionPlugin, NetworkPl new NamedWriteableRegistry.Entry(Task.Status.class, DataFrameField.TASK_NAME, DataFrameTransformState::new), new NamedWriteableRegistry.Entry(PersistentTaskState.class, DataFrameField.TASK_NAME, DataFrameTransformState::new), new NamedWriteableRegistry.Entry(SyncConfig.class, DataFrameField.TIME_BASED_SYNC.getPreferredName(), TimeSyncConfig::new), + new NamedWriteableRegistry.Entry(XPackFeatureSet.Usage.class, XPackField.FLATTENED, FlattenedFeatureSetUsage::new), // Vectors new NamedWriteableRegistry.Entry(XPackFeatureSet.Usage.class, XPackField.VECTORS, VectorsFeatureSetUsage::new), // Voting Only Node diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackField.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackField.java index 54e0f58ae28..351606e3218 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackField.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackField.java @@ -37,6 +37,8 @@ public final class XPackField { public static final String CCR = "ccr"; /** Name constant for the data frame feature. */ public static final String DATA_FRAME = "data_frame"; + /** Name constant for flattened fields. */ + public static final String FLATTENED = "flattened"; /** Name constant for the vectors feature. */ public static final String VECTORS = "vectors"; /** Name constant for the voting-only-node feature. */ diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackSettings.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackSettings.java index 81ac6a42020..ca4c1742e85 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackSettings.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackSettings.java @@ -119,6 +119,10 @@ public class XPackSettings { /** Setting for enabling or disabling sql. Defaults to true. */ public static final Setting SQL_ENABLED = Setting.boolSetting("xpack.sql.enabled", true, Setting.Property.NodeScope); + /** Setting for enabling or disabling flattened fields. Defaults to true. */ + public static final Setting FLATTENED_ENABLED = Setting.boolSetting("xpack.flattened.enabled", + true, Setting.Property.NodeScope); + /** Setting for enabling or disabling vectors. Defaults to true. */ public static final Setting VECTORS_ENABLED = Setting.boolSetting("xpack.vectors.enabled", true, Setting.Property.NodeScope); @@ -252,6 +256,7 @@ public class XPackSettings { settings.add(PASSWORD_HASHING_ALGORITHM); settings.add(INDEX_LIFECYCLE_ENABLED); settings.add(DATA_FRAME_ENABLED); + settings.add(FLATTENED_ENABLED); settings.add(VECTORS_ENABLED); return Collections.unmodifiableList(settings); } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/flattened/FlattenedFeatureSetUsage.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/flattened/FlattenedFeatureSetUsage.java new file mode 100644 index 00000000000..fe1a9bdadfd --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/flattened/FlattenedFeatureSetUsage.java @@ -0,0 +1,24 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.core.flattened; + +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.xpack.core.XPackFeatureSet; +import org.elasticsearch.xpack.core.XPackField; + +import java.io.IOException; + +public class FlattenedFeatureSetUsage extends XPackFeatureSet.Usage { + + public FlattenedFeatureSetUsage(StreamInput input) throws IOException { + super(input); + } + + public FlattenedFeatureSetUsage(boolean available, boolean enabled) { + super(XPackField.FLATTENED, available, enabled); + } +} diff --git a/x-pack/plugin/mapper-flattened/build.gradle b/x-pack/plugin/mapper-flattened/build.gradle new file mode 100644 index 00000000000..c9e1c408304 --- /dev/null +++ b/x-pack/plugin/mapper-flattened/build.gradle @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +evaluationDependsOn(xpackModule('core')) + +apply plugin: 'elasticsearch.esplugin' + +esplugin { + name 'flattened' + description 'Module for the flattened field type, which allows JSON objects to be flattened into a single field.' + classname 'org.elasticsearch.xpack.flattened.FlattenedMapperPlugin' + extendedPlugins = ['x-pack-core'] +} +archivesBaseName = 'x-pack-flattened' + +dependencies { + compileOnly project(path: xpackModule('core'), configuration: 'default') + testCompile project(path: xpackModule('core'), configuration: 'testArtifacts') + if (isEclipse) { + testCompile project(path: xpackModule('core-tests'), configuration: 'testArtifacts') + } +} + +integTest.enabled = false diff --git a/x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/FlattenedFeatureSet.java b/x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/FlattenedFeatureSet.java new file mode 100644 index 00000000000..5ebfa73c86c --- /dev/null +++ b/x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/FlattenedFeatureSet.java @@ -0,0 +1,54 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.flattened; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.license.XPackLicenseState; +import org.elasticsearch.xpack.core.XPackFeatureSet; +import org.elasticsearch.xpack.core.XPackField; +import org.elasticsearch.xpack.core.XPackSettings; +import org.elasticsearch.xpack.core.flattened.FlattenedFeatureSetUsage; + +import java.util.Map; + +public class FlattenedFeatureSet implements XPackFeatureSet { + + private final boolean enabled; + private final XPackLicenseState licenseState; + + @Inject + public FlattenedFeatureSet(Settings settings, XPackLicenseState licenseState) { + this.enabled = XPackSettings.FLATTENED_ENABLED.get(settings); + this.licenseState = licenseState; + } + + @Override + public String name() { + return XPackField.FLATTENED; + } + + @Override + public boolean available() { + return licenseState != null && licenseState.isFlattenedAllowed(); + } + + @Override + public boolean enabled() { + return enabled; + } + + @Override + public Map nativeCodeInfo() { + return null; + } + + @Override + public void usage(ActionListener listener) { + listener.onResponse(new FlattenedFeatureSetUsage(available(), enabled())); + } +} diff --git a/x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/FlattenedMapperPlugin.java b/x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/FlattenedMapperPlugin.java new file mode 100644 index 00000000000..1854f1b6f91 --- /dev/null +++ b/x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/FlattenedMapperPlugin.java @@ -0,0 +1,47 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.flattened; + +import org.elasticsearch.common.inject.Module; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.mapper.Mapper; +import org.elasticsearch.plugins.ActionPlugin; +import org.elasticsearch.plugins.MapperPlugin; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.xpack.core.XPackPlugin; +import org.elasticsearch.xpack.core.XPackSettings; +import org.elasticsearch.xpack.flattened.mapper.FlatObjectFieldMapper; + +import java.util.Collection; +import java.util.Collections; +import java.util.Map; + +import static java.util.Collections.emptyMap; +import static java.util.Collections.singletonMap; + +public class FlattenedMapperPlugin extends Plugin implements MapperPlugin, ActionPlugin { + + private final boolean enabled; + + public FlattenedMapperPlugin(Settings settings) { + this.enabled = XPackSettings.FLATTENED_ENABLED.get(settings); + } + + public Collection createGuiceModules() { + return Collections.singletonList(b -> { + XPackPlugin.bindFeatureSet(b, FlattenedFeatureSet.class); + }); + } + + @Override + public Map getMappers() { + if (enabled == false) { + return emptyMap(); + } + return singletonMap(FlatObjectFieldMapper.CONTENT_TYPE, new FlatObjectFieldMapper.TypeParser()); + } +} diff --git a/x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectFieldMapper.java b/x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectFieldMapper.java new file mode 100644 index 00000000000..eef76f0807b --- /dev/null +++ b/x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectFieldMapper.java @@ -0,0 +1,616 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.flattened.mapper; + +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.OrdinalMap; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocValuesFieldExistsQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AnalyzerScope; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.fielddata.AtomicOrdinalsFieldData; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.IndexFieldDataCache; +import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData; +import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource; +import org.elasticsearch.index.fielddata.plain.AbstractAtomicOrdinalsFieldData; +import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData; +import org.elasticsearch.index.fielddata.plain.SortedSetDVOrdinalsIndexFieldData; +import org.elasticsearch.index.mapper.DynamicKeyFieldMapper; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.FieldNamesFieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.Mapper; +import org.elasticsearch.index.mapper.MapperParsingException; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.ParseContext; +import org.elasticsearch.index.mapper.StringFieldType; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.indices.breaker.CircuitBreakerService; +import org.elasticsearch.search.MultiValueMode; + +import java.io.IOException; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.index.mapper.TypeParsers.parseField; + +/** + * A field mapper that accepts a JSON object and flattens it into a single field. This data type + * can be a useful alternative to an 'object' mapping when the object has a large, unknown set + * of keys. + * + * Currently the mapper extracts all leaf values of the JSON object, converts them to their text + * representations, and indexes each one as a keyword. It creates both a 'keyed' version of the token + * to allow searches on particular key-value pairs, as well as a 'root' token without the key + * + * As an example, given a flat object field called 'flat_object' and the following input + * + * { + * "flat_object": { + * "key1": "some value", + * "key2": { + * "key3": true + * } + * } + * } + * + * the mapper will produce untokenized string fields with the name "flat_object" and values + * "some value" and "true", as well as string fields called "flat_object._keyed" with values + * "key\0some value" and "key2.key3\0true". Note that \0 is used as a reserved separator + * character (see {@link FlatObjectFieldParser#SEPARATOR}). + */ +public final class FlatObjectFieldMapper extends DynamicKeyFieldMapper { + + public static final String CONTENT_TYPE = "flattened"; + private static final String KEYED_FIELD_SUFFIX = "._keyed"; + + private static class Defaults { + public static final MappedFieldType FIELD_TYPE = new RootFlatObjectFieldType(); + + static { + FIELD_TYPE.setTokenized(false); + FIELD_TYPE.setStored(false); + FIELD_TYPE.setHasDocValues(true); + FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); + FIELD_TYPE.setOmitNorms(true); + FIELD_TYPE.freeze(); + } + + public static final int DEPTH_LIMIT = 20; + public static final int IGNORE_ABOVE = Integer.MAX_VALUE; + } + + public static class Builder extends FieldMapper.Builder { + private int depthLimit = Defaults.DEPTH_LIMIT; + private int ignoreAbove = Defaults.IGNORE_ABOVE; + + public Builder(String name) { + super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE); + builder = this; + } + + @Override + public RootFlatObjectFieldType fieldType() { + return (RootFlatObjectFieldType) super.fieldType(); + } + + @Override + public Builder indexOptions(IndexOptions indexOptions) { + if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) > 0) { + throw new IllegalArgumentException("The [" + CONTENT_TYPE + + "] field does not support positions, got [index_options]=" + + indexOptionToString(indexOptions)); + } + return super.indexOptions(indexOptions); + } + + public Builder depthLimit(int depthLimit) { + if (depthLimit < 0) { + throw new IllegalArgumentException("[depth_limit] must be positive, got " + depthLimit); + } + this.depthLimit = depthLimit; + return this; + } + + public Builder eagerGlobalOrdinals(boolean eagerGlobalOrdinals) { + fieldType().setEagerGlobalOrdinals(eagerGlobalOrdinals); + return builder; + } + + public Builder ignoreAbove(int ignoreAbove) { + if (ignoreAbove < 0) { + throw new IllegalArgumentException("[ignore_above] must be positive, got " + ignoreAbove); + } + this.ignoreAbove = ignoreAbove; + return this; + } + + public Builder splitQueriesOnWhitespace(boolean splitQueriesOnWhitespace) { + fieldType().setSplitQueriesOnWhitespace(splitQueriesOnWhitespace); + return builder; + } + + @Override + public Builder addMultiField(Mapper.Builder mapperBuilder) { + throw new UnsupportedOperationException("[fields] is not supported for [" + CONTENT_TYPE + "] fields."); + } + + @Override + public Builder copyTo(CopyTo copyTo) { + throw new UnsupportedOperationException("[copy_to] is not supported for [" + CONTENT_TYPE + "] fields."); + } + + @Override + public Builder store(boolean store) { + throw new UnsupportedOperationException("[store] is not supported for [" + CONTENT_TYPE + "] fields."); + } + + @Override + public FlatObjectFieldMapper build(BuilderContext context) { + setupFieldType(context); + if (fieldType().splitQueriesOnWhitespace()) { + NamedAnalyzer whitespaceAnalyzer = new NamedAnalyzer("whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer()); + fieldType().setSearchAnalyzer(whitespaceAnalyzer); + } + return new FlatObjectFieldMapper(name, fieldType, defaultFieldType, + ignoreAbove, depthLimit, context.indexSettings()); + } + } + + public static class TypeParser implements Mapper.TypeParser { + @Override + public Mapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { + Builder builder = new Builder(name); + parseField(builder, name, node, parserContext); + for (Iterator> iterator = node.entrySet().iterator(); iterator.hasNext();) { + Map.Entry entry = iterator.next(); + String propName = entry.getKey(); + Object propNode = entry.getValue(); + if (propName.equals("depth_limit")) { + builder.depthLimit(XContentMapValues.nodeIntegerValue(propNode, -1)); + iterator.remove(); + } else if (propName.equals("eager_global_ordinals")) { + builder.eagerGlobalOrdinals(XContentMapValues.nodeBooleanValue(propNode, "eager_global_ordinals")); + iterator.remove(); + } else if (propName.equals("ignore_above")) { + builder.ignoreAbove(XContentMapValues.nodeIntegerValue(propNode, -1)); + iterator.remove(); + } else if (propName.equals("null_value")) { + if (propNode == null) { + throw new MapperParsingException("Property [null_value] cannot be null."); + } + builder.nullValue(propNode.toString()); + iterator.remove(); + } else if (propName.equals("split_queries_on_whitespace")) { + builder.splitQueriesOnWhitespace + (XContentMapValues.nodeBooleanValue(propNode, "split_queries_on_whitespace")); + iterator.remove(); + } + } + return builder; + } + } + + /** + * A field type that represents the values under a particular JSON key, used + * when searching under a specific key as in 'my_flat_object.key: some_value'. + */ + public static final class KeyedFlatObjectFieldType extends StringFieldType { + private final String key; + private boolean splitQueriesOnWhitespace; + + public KeyedFlatObjectFieldType(String key) { + setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); + setSearchAnalyzer(Lucene.KEYWORD_ANALYZER); + this.key = key; + } + + public KeyedFlatObjectFieldType clone() { + return new KeyedFlatObjectFieldType(this); + } + + private KeyedFlatObjectFieldType(KeyedFlatObjectFieldType ref) { + super(ref); + this.key = ref.key; + this.splitQueriesOnWhitespace = ref.splitQueriesOnWhitespace; + } + + private KeyedFlatObjectFieldType(String name, String key, RootFlatObjectFieldType ref) { + super(ref); + setName(name); + this.key = key; + this.splitQueriesOnWhitespace = ref.splitQueriesOnWhitespace; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + KeyedFlatObjectFieldType that = (KeyedFlatObjectFieldType) o; + return splitQueriesOnWhitespace == that.splitQueriesOnWhitespace; + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), splitQueriesOnWhitespace); + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + public String key() { + return key; + } + + public boolean splitQueriesOnWhitespace() { + return splitQueriesOnWhitespace; + } + + public void setSplitQueriesOnWhitespace(boolean splitQueriesOnWhitespace) { + checkIfFrozen(); + this.splitQueriesOnWhitespace = splitQueriesOnWhitespace; + } + + @Override + public Query existsQuery(QueryShardContext context) { + Term term = new Term(name(), FlatObjectFieldParser.createKeyedValue(key, "")); + return new PrefixQuery(term); + } + + @Override + public Query rangeQuery(Object lowerTerm, + Object upperTerm, + boolean includeLower, + boolean includeUpper, + QueryShardContext context) { + + // We require range queries to specify both bounds because an unbounded query could incorrectly match + // values from other keys. For example, a query on the 'first' key with only a lower bound would become + // ("first\0value", null), which would also match the value "second\0value" belonging to the key 'second'. + if (lowerTerm == null || upperTerm == null) { + throw new IllegalArgumentException("[range] queries on keyed [" + CONTENT_TYPE + + "] fields must include both an upper and a lower bound."); + } + + return super.rangeQuery(lowerTerm, upperTerm, + includeLower, includeUpper, context); + } + + @Override + public Query fuzzyQuery(Object value, Fuzziness fuzziness, int prefixLength, int maxExpansions, + boolean transpositions) { + throw new UnsupportedOperationException("[fuzzy] queries are not currently supported on keyed " + + "[" + CONTENT_TYPE + "] fields."); + } + + @Override + public Query regexpQuery(String value, int flags, int maxDeterminizedStates, + MultiTermQuery.RewriteMethod method, QueryShardContext context) { + throw new UnsupportedOperationException("[regexp] queries are not currently supported on keyed " + + "[" + CONTENT_TYPE + "] fields."); + } + + @Override + public Query wildcardQuery(String value, + MultiTermQuery.RewriteMethod method, + QueryShardContext context) { + throw new UnsupportedOperationException("[wildcard] queries are not currently supported on keyed " + + "[" + CONTENT_TYPE + "] fields."); + } + + @Override + public BytesRef indexedValueForSearch(Object value) { + if (value == null) { + return null; + } + + String stringValue = value instanceof BytesRef + ? ((BytesRef) value).utf8ToString() + : value.toString(); + String keyedValue = FlatObjectFieldParser.createKeyedValue(key, stringValue); + return new BytesRef(keyedValue); + } + + @Override + public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) { + failIfNoDocValues(); + return new KeyedFlatObjectFieldData.Builder(key); + } + } + + /** + * A field data implementation that gives access to the values associated with + * a particular JSON key. + * + * This class wraps the field data that is built directly on the keyed flat object field, + * and filters out values whose prefix doesn't match the requested key. Loading and caching + * is fully delegated to the wrapped field data, so that different {@link KeyedFlatObjectFieldData} + * for the same flat object field share the same global ordinals. + * + * Because of the code-level complexity it would introduce, it is currently not possible + * to retrieve the underlying global ordinals map through {@link #getOrdinalMap()}. + */ + public static class KeyedFlatObjectFieldData implements IndexOrdinalsFieldData { + private final String key; + private final IndexOrdinalsFieldData delegate; + + private KeyedFlatObjectFieldData(String key, IndexOrdinalsFieldData delegate) { + this.delegate = delegate; + this.key = key; + } + + public String getKey() { + return key; + } + + @Override + public String getFieldName() { + return delegate.getFieldName(); + } + + @Override + public SortField sortField(Object missingValue, + MultiValueMode sortMode, + XFieldComparatorSource.Nested nested, + boolean reverse) { + XFieldComparatorSource source = new BytesRefFieldComparatorSource(this, missingValue, sortMode, nested); + return new SortField(getFieldName(), source, reverse); + } + + @Override + public void clear() { + delegate.clear(); + } + + @Override + public AtomicOrdinalsFieldData load(LeafReaderContext context) { + AtomicOrdinalsFieldData fieldData = delegate.load(context); + return new KeyedFlatObjectAtomicFieldData(key, fieldData); + } + + @Override + public AtomicOrdinalsFieldData loadDirect(LeafReaderContext context) throws Exception { + AtomicOrdinalsFieldData fieldData = delegate.loadDirect(context); + return new KeyedFlatObjectAtomicFieldData(key, fieldData); + } + + @Override + public IndexOrdinalsFieldData loadGlobal(DirectoryReader indexReader) { + IndexOrdinalsFieldData fieldData = delegate.loadGlobal(indexReader); + return new KeyedFlatObjectFieldData(key, fieldData); + } + + @Override + public IndexOrdinalsFieldData localGlobalDirect(DirectoryReader indexReader) throws Exception { + IndexOrdinalsFieldData fieldData = delegate.localGlobalDirect(indexReader); + return new KeyedFlatObjectFieldData(key, fieldData); + } + + @Override + public OrdinalMap getOrdinalMap() { + throw new UnsupportedOperationException("The field data for the flat object field [" + + delegate.getFieldName() + "] does not allow access to the underlying ordinal map."); + } + + @Override + public boolean supportsGlobalOrdinalsMapping() { + return false; + } + + @Override + public Index index() { + return delegate.index(); + } + + public static class Builder implements IndexFieldData.Builder { + private final String key; + + Builder(String key) { + this.key = key; + } + + @Override + public IndexFieldData build(IndexSettings indexSettings, + MappedFieldType fieldType, + IndexFieldDataCache cache, + CircuitBreakerService breakerService, + MapperService mapperService) { + String fieldName = fieldType.name(); + IndexOrdinalsFieldData delegate = new SortedSetDVOrdinalsIndexFieldData(indexSettings, + cache, fieldName, breakerService, AbstractAtomicOrdinalsFieldData.DEFAULT_SCRIPT_FUNCTION); + return new KeyedFlatObjectFieldData(key, delegate); + } + } + } + + /** + * A field type that represents all 'root' values. This field type is used in + * searches on the flat object field itself, e.g. 'my_flat_object: some_value'. + */ + public static final class RootFlatObjectFieldType extends StringFieldType { + private boolean splitQueriesOnWhitespace; + + public RootFlatObjectFieldType() { + setIndexAnalyzer(Lucene.KEYWORD_ANALYZER); + setSearchAnalyzer(Lucene.KEYWORD_ANALYZER); + } + + private RootFlatObjectFieldType(RootFlatObjectFieldType ref) { + super(ref); + this.splitQueriesOnWhitespace = ref.splitQueriesOnWhitespace; + } + + public RootFlatObjectFieldType clone() { + return new RootFlatObjectFieldType(this); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + RootFlatObjectFieldType that = (RootFlatObjectFieldType) o; + return splitQueriesOnWhitespace == that.splitQueriesOnWhitespace; + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), splitQueriesOnWhitespace); + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + public boolean splitQueriesOnWhitespace() { + return splitQueriesOnWhitespace; + } + + public void setSplitQueriesOnWhitespace(boolean splitQueriesOnWhitespace) { + checkIfFrozen(); + this.splitQueriesOnWhitespace = splitQueriesOnWhitespace; + } + + @Override + public Object valueForDisplay(Object value) { + if (value == null) { + return null; + } + BytesRef binaryValue = (BytesRef) value; + return binaryValue.utf8ToString(); + } + + @Override + public Query existsQuery(QueryShardContext context) { + if (hasDocValues()) { + return new DocValuesFieldExistsQuery(name()); + } else { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } + } + + @Override + public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) { + failIfNoDocValues(); + return new DocValuesIndexFieldData.Builder(); + } + } + + private final FlatObjectFieldParser fieldParser; + private int depthLimit; + private int ignoreAbove; + + private FlatObjectFieldMapper(String simpleName, + MappedFieldType fieldType, + MappedFieldType defaultFieldType, + int ignoreAbove, + int depthLimit, + Settings indexSettings) { + super(simpleName, fieldType, defaultFieldType, indexSettings, CopyTo.empty()); + assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0; + + this.depthLimit = depthLimit; + this.ignoreAbove = ignoreAbove; + this.fieldParser = new FlatObjectFieldParser(fieldType.name(), keyedFieldName(), + fieldType, depthLimit, ignoreAbove); + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + + @Override + protected void doMerge(Mapper mergeWith) { + super.doMerge(mergeWith); + this.ignoreAbove = ((FlatObjectFieldMapper) mergeWith).ignoreAbove; + } + + @Override + protected FlatObjectFieldMapper clone() { + return (FlatObjectFieldMapper) super.clone(); + } + + @Override + public RootFlatObjectFieldType fieldType() { + return (RootFlatObjectFieldType) super.fieldType(); + } + + @Override + public KeyedFlatObjectFieldType keyedFieldType(String key) { + return new KeyedFlatObjectFieldType(keyedFieldName(), key, fieldType()); + } + + public String keyedFieldName() { + return fieldType.name() + KEYED_FIELD_SUFFIX; + } + + @Override + protected void parseCreateField(ParseContext context, List fields) throws IOException { + if (context.parser().currentToken() == XContentParser.Token.VALUE_NULL) { + return; + } + + if (fieldType.indexOptions() == IndexOptions.NONE && !fieldType.hasDocValues()) { + context.parser().skipChildren(); + return; + } + + XContentParser xContentParser = context.parser(); + fields.addAll(fieldParser.parse(xContentParser)); + + if (!fieldType.hasDocValues()) { + createFieldNamesField(context, fields); + } + } + + @Override + protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { + super.doXContentBody(builder, includeDefaults, params); + + if (includeDefaults || depthLimit != Defaults.DEPTH_LIMIT) { + builder.field("depth_limit", depthLimit); + } + + if (includeDefaults || ignoreAbove != Defaults.IGNORE_ABOVE) { + builder.field("ignore_above", ignoreAbove); + } + + if (includeDefaults || fieldType().nullValue() != null) { + builder.field("null_value", fieldType().nullValue()); + } + + if (includeDefaults || fieldType().splitQueriesOnWhitespace()) { + builder.field("split_queries_on_whitespace", fieldType().splitQueriesOnWhitespace()); + } + } +} diff --git a/x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectFieldParser.java b/x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectFieldParser.java new file mode 100644 index 00000000000..a855440b2be --- /dev/null +++ b/x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectFieldParser.java @@ -0,0 +1,167 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.flattened.mapper; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentParserUtils; +import org.elasticsearch.index.mapper.ContentPath; +import org.elasticsearch.index.mapper.MappedFieldType; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * A helper class for {@link FlatObjectFieldMapper} parses a JSON object + * and produces a pair of indexable fields for each leaf value. + */ +class FlatObjectFieldParser { + static final String SEPARATOR = "\0"; + private static final byte SEPARATOR_BYTE = '\0'; + + private final String rootFieldName; + private final String keyedFieldName; + + private final MappedFieldType fieldType; + private final int depthLimit; + private final int ignoreAbove; + + FlatObjectFieldParser(String rootFieldName, + String keyedFieldName, + MappedFieldType fieldType, + int depthLimit, + int ignoreAbove) { + this.rootFieldName = rootFieldName; + this.keyedFieldName = keyedFieldName; + this.fieldType = fieldType; + this.depthLimit = depthLimit; + this.ignoreAbove = ignoreAbove; + } + + public List parse(XContentParser parser) throws IOException { + XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, + parser.currentToken(), + parser::getTokenLocation); + + ContentPath path = new ContentPath(); + List fields = new ArrayList<>(); + + parseObject(parser, path, fields); + return fields; + } + + private void parseObject(XContentParser parser, + ContentPath path, + List fields) throws IOException { + String currentName = null; + while (true) { + XContentParser.Token token = parser.nextToken(); + if (token == XContentParser.Token.END_OBJECT) { + return; + } + + if (token == XContentParser.Token.FIELD_NAME) { + currentName = parser.currentName(); + } else { + assert currentName != null; + parseFieldValue(token, parser, path, currentName, fields); + } + } + } + + private void parseArray(XContentParser parser, + ContentPath path, + String currentName, + List fields) throws IOException { + while (true) { + XContentParser.Token token = parser.nextToken(); + if (token == XContentParser.Token.END_ARRAY) { + return; + } + parseFieldValue(token, parser, path, currentName, fields); + } + } + + private void parseFieldValue(XContentParser.Token token, + XContentParser parser, + ContentPath path, + String currentName, + List fields) throws IOException { + if (token == XContentParser.Token.START_OBJECT) { + path.add(currentName); + validateDepthLimit(path); + parseObject(parser, path, fields); + path.remove(); + } else if (token == XContentParser.Token.START_ARRAY) { + parseArray(parser, path, currentName, fields); + } else if (token.isValue()) { + String value = parser.text(); + addField(path, currentName, value, fields); + } else if (token == XContentParser.Token.VALUE_NULL) { + if (fieldType.nullValueAsString() != null) { + addField(path, currentName, fieldType.nullValueAsString(), fields); + } + } else { + // Note that we throw an exception here just to be safe. We don't actually expect to reach + // this case, since XContentParser verifies that the input is well-formed as it parses. + throw new IllegalArgumentException("Encountered unexpected token [" + token.toString() + "]."); + } + } + + private void addField(ContentPath path, + String currentName, + String value, + List fields) { + if (value.length() > ignoreAbove) { + return; + } + + String key = path.pathAsText(currentName); + if (key.contains(SEPARATOR)) { + throw new IllegalArgumentException("Keys in [flattened] fields cannot contain the reserved character \\0." + + " Offending key: [" + key + "]."); + } + String keyedValue = createKeyedValue(key, value); + + if (fieldType.indexOptions() != IndexOptions.NONE) { + fields.add(new StringField(rootFieldName, new BytesRef(value), Field.Store.NO)); + fields.add(new StringField(keyedFieldName, new BytesRef(keyedValue), Field.Store.NO)); + } + + if (fieldType.hasDocValues()) { + fields.add(new SortedSetDocValuesField(rootFieldName, new BytesRef(value))); + fields.add(new SortedSetDocValuesField(keyedFieldName, new BytesRef(keyedValue))); + } + } + + private void validateDepthLimit(ContentPath path) { + if (path.length() + 1 > depthLimit) { + throw new IllegalArgumentException("The provided [flattened] field [" + rootFieldName +"]" + + " exceeds the maximum depth limit of [" + depthLimit + "]."); + } + } + + static String createKeyedValue(String key, String value) { + return key + SEPARATOR + value; + } + + static BytesRef extractKey(BytesRef keyedValue) { + int length; + for (length = 0; length < keyedValue.length; length++){ + if (keyedValue.bytes[keyedValue.offset + length] == SEPARATOR_BYTE) { + break; + } + } + return new BytesRef(keyedValue.bytes, keyedValue.offset, length); + } +} diff --git a/x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/mapper/KeyedFlatObjectAtomicFieldData.java b/x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/mapper/KeyedFlatObjectAtomicFieldData.java new file mode 100644 index 00000000000..7f5a5235095 --- /dev/null +++ b/x-pack/plugin/mapper-flattened/src/main/java/org/elasticsearch/xpack/flattened/mapper/KeyedFlatObjectAtomicFieldData.java @@ -0,0 +1,251 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.flattened.mapper; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.index.fielddata.AbstractSortedSetDocValues; +import org.elasticsearch.index.fielddata.AtomicOrdinalsFieldData; +import org.elasticsearch.index.fielddata.FieldData; +import org.elasticsearch.index.fielddata.ScriptDocValues; +import org.elasticsearch.index.fielddata.SortedBinaryDocValues; +import org.elasticsearch.index.fielddata.plain.AbstractAtomicOrdinalsFieldData; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Collection; + +/** + * The atomic field data implementation for {@link FlatObjectFieldMapper.KeyedFlatObjectFieldType}. + * + * This class wraps the field data that is built directly on the keyed flat object field, + * and filters out values whose prefix doesn't match the requested key. + * + * In order to support all usage patterns, the delegate's ordinal values are shifted + * to range from 0 to the number of total values. + */ +public class KeyedFlatObjectAtomicFieldData implements AtomicOrdinalsFieldData { + + private final String key; + private final AtomicOrdinalsFieldData delegate; + + KeyedFlatObjectAtomicFieldData(String key, + AtomicOrdinalsFieldData delegate) { + this.key = key; + this.delegate = delegate; + } + + @Override + public long ramBytesUsed() { + return delegate.ramBytesUsed(); + } + + @Override + public Collection getChildResources() { + return delegate.getChildResources(); + } + + @Override + public SortedSetDocValues getOrdinalsValues() { + BytesRef keyBytes = new BytesRef(key); + SortedSetDocValues values = delegate.getOrdinalsValues(); + + long minOrd, maxOrd; + try { + minOrd = findMinOrd(keyBytes, values); + if (minOrd < 0) { + return DocValues.emptySortedSet(); + } + maxOrd = findMaxOrd(keyBytes, values); + assert maxOrd >= 0; + } catch (IOException e) { + throw new UncheckedIOException(e); + } + + return new KeyedJsonDocValues(keyBytes, values, minOrd, maxOrd); + } + + @Override + public void close() { + delegate.close(); + } + + @Override + public ScriptDocValues getScriptValues() { + return AbstractAtomicOrdinalsFieldData.DEFAULT_SCRIPT_FUNCTION + .apply(getOrdinalsValues()); + } + + @Override + public SortedBinaryDocValues getBytesValues() { + return FieldData.toString(getOrdinalsValues()); + } + + /** + * Performs a binary search to find the first term with 'key' as a prefix. + */ + static long findMinOrd(BytesRef key, SortedSetDocValues delegate) throws IOException { + long low = 0; + long high = delegate.getValueCount() - 1; + + long result = -1; + while (low <= high) { + long mid = (low + high) >>> 1; + final BytesRef term = delegate.lookupOrd(mid); + int cmp = compare(key, term); + + if (cmp == 0) { + result = mid; + high = mid - 1; + } else if (cmp < 0) { + high = mid - 1; + } else { + low = mid + 1; + } + } + return result; + } + + /** + * Performs a binary search to find the last term with 'key' as a prefix. + */ + static long findMaxOrd(BytesRef key, SortedSetDocValues delegate) throws IOException { + long low = 0; + long high = delegate.getValueCount() - 1; + + long result = -1; + while (low <= high) { + long mid = (low + high) >>> 1; + final BytesRef term = delegate.lookupOrd(mid); + int cmp = compare(key, term); + + if (cmp == 0) { + result = mid; + low = mid + 1; + } else if (cmp < 0) { + high = mid - 1; + } else { + low = mid + 1; + } + } + return result; + } + + private static int compare(BytesRef key, BytesRef term) { + BytesRef extractedKey = FlatObjectFieldParser.extractKey(term); + return key.compareTo(extractedKey); + } + + private static class KeyedJsonDocValues extends AbstractSortedSetDocValues { + + private final BytesRef key; + private final SortedSetDocValues delegate; + + /** + * The first and last ordinals whose term has 'key' as a prefix. These + * values must be non-negative (there is at least one matching term). + */ + private final long minOrd; + private final long maxOrd; + + /** + * We cache the first ordinal in a document to avoid unnecessary iterations + * through the delegate doc values. If no ordinal is cached for the current + * document, this value will be -1. + */ + private long cachedNextOrd; + + private KeyedJsonDocValues(BytesRef key, + SortedSetDocValues delegate, + long minOrd, + long maxOrd) { + assert minOrd >= 0 && maxOrd >= 0; + this.key = key; + this.delegate = delegate; + this.minOrd = minOrd; + this.maxOrd = maxOrd; + this.cachedNextOrd = -1; + } + + @Override + public long getValueCount() { + return maxOrd - minOrd + 1; + } + + /** + * Returns the (un-prefixed) term value for the requested ordinal. + * + * Note that this method can only be called on ordinals returned from {@link #nextOrd()}. + */ + @Override + public BytesRef lookupOrd(long ord) throws IOException { + long delegateOrd = unmapOrd(ord); + BytesRef keyedValue = delegate.lookupOrd(delegateOrd); + + int prefixLength = key.length + 1; + int valueLength = keyedValue.length - prefixLength; + return new BytesRef(keyedValue.bytes, prefixLength, valueLength); + } + + @Override + public long nextOrd() throws IOException { + if (cachedNextOrd >= 0) { + long nextOrd = cachedNextOrd; + cachedNextOrd = -1; + return mapOrd(nextOrd); + } + + long ord = delegate.nextOrd(); + if (ord != NO_MORE_ORDS && ord <= maxOrd) { + assert ord >= minOrd; + return mapOrd(ord); + } else { + return NO_MORE_ORDS; + } + } + + @Override + public boolean advanceExact(int target) throws IOException { + if (delegate.advanceExact(target)) { + while (true) { + long ord = delegate.nextOrd(); + if (ord == NO_MORE_ORDS || ord > maxOrd) { + break; + } + + if (ord >= minOrd) { + cachedNextOrd = ord; + return true; + } + } + } + + cachedNextOrd = -1; + return false; + } + + /** + * Maps an ordinal from the delegate doc values into the filtered ordinal space. The + * ordinal is shifted to lie in the range [0, (maxOrd - minOrd)]. + */ + private long mapOrd(long ord) { + assert minOrd <= ord && ord <= maxOrd; + return ord - minOrd; + } + + /** + * Given a filtered ordinal in the range [0, (maxOrd - minOrd)], maps it into the + * delegate ordinal space. + */ + private long unmapOrd(long ord) { + assert 0 <= ord && ord <= maxOrd - minOrd; + return ord + minOrd; + } + } +} diff --git a/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/index/mapper/FlatObjectFieldLookupTests.java b/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/index/mapper/FlatObjectFieldLookupTests.java new file mode 100644 index 00000000000..9f09d8bb669 --- /dev/null +++ b/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/index/mapper/FlatObjectFieldLookupTests.java @@ -0,0 +1,188 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.index.mapper; + +import org.elasticsearch.Version; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.fielddata.AtomicFieldData; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.ScriptDocValues; +import org.elasticsearch.search.lookup.LeafDocLookup; +import org.elasticsearch.search.lookup.SearchLookup; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.flattened.mapper.FlatObjectFieldMapper; +import org.elasticsearch.xpack.flattened.mapper.FlatObjectFieldMapper.KeyedFlatObjectFieldType; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import java.util.function.Function; + +import static java.util.Collections.emptyList; +import static java.util.Collections.singletonList; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder; +import static org.mockito.Matchers.anyObject; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class FlatObjectFieldLookupTests extends ESTestCase { + + public void testFieldTypeLookup() { + String fieldName = "object1.object2.field"; + FlatObjectFieldMapper mapper = createFlatObjectMapper(fieldName); + + FieldTypeLookup lookup = new FieldTypeLookup() + .copyAndAddAll("type", singletonList(mapper), emptyList()); + assertEquals(mapper.fieldType(), lookup.get(fieldName)); + + String objectKey = "key1.key2"; + String searchFieldName = fieldName + "." + objectKey; + + MappedFieldType searchFieldType = lookup.get(searchFieldName); + assertEquals(mapper.keyedFieldName(), searchFieldType.name()); + assertThat(searchFieldType, instanceOf(KeyedFlatObjectFieldType.class)); + + FlatObjectFieldMapper.KeyedFlatObjectFieldType keyedFieldType = (KeyedFlatObjectFieldType) searchFieldType; + assertEquals(objectKey, keyedFieldType.key()); + } + + public void testFieldTypeLookupWithAlias() { + String fieldName = "object1.object2.field"; + FlatObjectFieldMapper mapper = createFlatObjectMapper(fieldName); + + String aliasName = "alias"; + FieldAliasMapper alias = new FieldAliasMapper(aliasName, aliasName, fieldName); + + FieldTypeLookup lookup = new FieldTypeLookup() + .copyAndAddAll("type", singletonList(mapper), singletonList(alias)); + assertEquals(mapper.fieldType(), lookup.get(aliasName)); + + String objectKey = "key1.key2"; + String searchFieldName = aliasName + "." + objectKey; + + MappedFieldType searchFieldType = lookup.get(searchFieldName); + assertEquals(mapper.keyedFieldName(), searchFieldType.name()); + assertThat(searchFieldType, instanceOf(KeyedFlatObjectFieldType.class)); + + KeyedFlatObjectFieldType keyedFieldType = (KeyedFlatObjectFieldType) searchFieldType; + assertEquals(objectKey, keyedFieldType.key()); + } + + public void testFieldTypeLookupWithMultipleFields() { + String field1 = "object1.object2.field"; + String field2 = "object1.field"; + String field3 = "object2.field"; + + FlatObjectFieldMapper mapper1 = createFlatObjectMapper(field1); + FlatObjectFieldMapper mapper2 = createFlatObjectMapper(field2); + FlatObjectFieldMapper mapper3 = createFlatObjectMapper(field3); + + FieldTypeLookup lookup = new FieldTypeLookup() + .copyAndAddAll("type", Arrays.asList(mapper1, mapper2), emptyList()); + assertNotNull(lookup.get(field1 + ".some.key")); + assertNotNull(lookup.get(field2 + ".some.key")); + + lookup = lookup.copyAndAddAll("type", singletonList(mapper3), emptyList()); + assertNotNull(lookup.get(field1 + ".some.key")); + assertNotNull(lookup.get(field2 + ".some.key")); + assertNotNull(lookup.get(field3 + ".some.key")); + } + + public void testMaxDynamicKeyDepth() { + FieldTypeLookup lookup = new FieldTypeLookup(); + assertEquals(0, lookup.maxKeyedLookupDepth()); + + // Add a flattened object field. + String flatObjectName = "object1.object2.field"; + FlatObjectFieldMapper flatObjectField = createFlatObjectMapper(flatObjectName); + lookup = lookup.copyAndAddAll("type", singletonList(flatObjectField), emptyList()); + assertEquals(3, lookup.maxKeyedLookupDepth()); + + // Add a short alias to that field. + String aliasName = "alias"; + FieldAliasMapper alias = new FieldAliasMapper(aliasName, aliasName, flatObjectName); + lookup = lookup.copyAndAddAll("type", emptyList(), singletonList(alias)); + assertEquals(3, lookup.maxKeyedLookupDepth()); + + // Add a longer alias to that field. + String longAliasName = "object1.object2.object3.alias"; + FieldAliasMapper longAlias = new FieldAliasMapper(longAliasName, longAliasName, flatObjectName); + lookup = lookup.copyAndAddAll("type", emptyList(), singletonList(longAlias)); + assertEquals(4, lookup.maxKeyedLookupDepth()); + + // Update the long alias to refer to a non-flattened object field. + String fieldName = "field"; + MockFieldMapper field = new MockFieldMapper(fieldName); + longAlias = new FieldAliasMapper(longAliasName, longAliasName, fieldName); + lookup = lookup.copyAndAddAll("type", singletonList(field), singletonList(longAlias)); + assertEquals(3, lookup.maxKeyedLookupDepth()); + } + + public void testFieldLookupIterator() { + MockFieldMapper mapper = new MockFieldMapper("foo"); + FlatObjectFieldMapper flatObjectMapper = createFlatObjectMapper("object1.object2.field"); + + FieldTypeLookup lookup = new FieldTypeLookup() + .copyAndAddAll("type", Arrays.asList(mapper, flatObjectMapper), emptyList()); + + Set fieldNames = new HashSet<>(); + for (MappedFieldType fieldType : lookup) { + fieldNames.add(fieldType.name()); + } + + assertThat(fieldNames, containsInAnyOrder( + mapper.name(), flatObjectMapper.name(), flatObjectMapper.keyedFieldName())); + } + + private FlatObjectFieldMapper createFlatObjectMapper(String fieldName) { + Settings settings = Settings.builder() + .put("index.version.created", Version.CURRENT) + .build(); + Mapper.BuilderContext context = new Mapper.BuilderContext(settings, new ContentPath()); + return new FlatObjectFieldMapper.Builder(fieldName).build(context); + } + + public void testScriptDocValuesLookup() { + MapperService mapperService = mock(MapperService.class); + + ScriptDocValues docValues1 = mock(ScriptDocValues.class); + IndexFieldData fieldData1 = createFieldData(docValues1); + + ScriptDocValues docValues2 = mock(ScriptDocValues.class); + IndexFieldData fieldData2 = createFieldData(docValues2); + + KeyedFlatObjectFieldType fieldType1 = new KeyedFlatObjectFieldType("key1"); + when(mapperService.fullName("json.key1")).thenReturn(fieldType1); + + KeyedFlatObjectFieldType fieldType2 = new KeyedFlatObjectFieldType( "key2"); + when(mapperService.fullName("json.key2")).thenReturn(fieldType2); + + Function> fieldDataSupplier = fieldType -> { + KeyedFlatObjectFieldType keyedFieldType = (KeyedFlatObjectFieldType) fieldType; + return keyedFieldType.key().equals("key1") ? fieldData1 : fieldData2; + }; + + SearchLookup searchLookup = new SearchLookup(mapperService, fieldDataSupplier, new String[] {"_doc"}); + LeafDocLookup docLookup = searchLookup.doc().getLeafDocLookup(null); + + assertEquals(docValues1, docLookup.get("json.key1")); + assertEquals(docValues2, docLookup.get("json.key2")); + } + + private IndexFieldData createFieldData(ScriptDocValues scriptDocValues) { + AtomicFieldData atomicFieldData = mock(AtomicFieldData.class); + doReturn(scriptDocValues).when(atomicFieldData).getScriptValues(); + + IndexFieldData fieldData = mock(IndexFieldData.class); + when(fieldData.getFieldName()).thenReturn("field"); + doReturn(atomicFieldData).when(fieldData).load(anyObject()); + + return fieldData; + } +} diff --git a/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectFieldMapperTests.java b/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectFieldMapperTests.java new file mode 100644 index 00000000000..71cdfa06938 --- /dev/null +++ b/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectFieldMapperTests.java @@ -0,0 +1,457 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.flattened.mapper; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.IndexService; +import org.elasticsearch.index.mapper.DocumentMapper; +import org.elasticsearch.index.mapper.DocumentMapperParser; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.FieldNamesFieldMapper; +import org.elasticsearch.index.mapper.MapperParsingException; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.index.mapper.SourceToParse; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.xpack.core.XPackPlugin; +import org.elasticsearch.xpack.flattened.FlattenedMapperPlugin; +import org.elasticsearch.xpack.flattened.mapper.FlatObjectFieldMapper.KeyedFlatObjectFieldType; +import org.elasticsearch.xpack.flattened.mapper.FlatObjectFieldMapper.RootFlatObjectFieldType; +import org.junit.Before; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; + +import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; +import static org.hamcrest.Matchers.equalTo; + +public class FlatObjectFieldMapperTests extends ESSingleNodeTestCase { + private IndexService indexService; + private DocumentMapperParser parser; + + @Before + public void setup() { + indexService = createIndex("test"); + parser = indexService.mapperService().documentMapperParser(); + } + + @Override + protected Collection> getPlugins() { + return pluginList(FlattenedMapperPlugin.class, XPackPlugin.class); + } + + public void testDefaults() throws Exception { + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "flattened") + .endObject() + .endObject() + .endObject() + .endObject()); + + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); + assertEquals(mapping, mapper.mappingSource().toString()); + + BytesReference doc = BytesReference.bytes(XContentFactory.jsonBuilder().startObject() + .startObject("field") + .field("key", "value") + .endObject() + .endObject()); + + ParsedDocument parsedDoc = mapper.parse(new SourceToParse("test", "type", "1", doc, XContentType.JSON)); + + // Check the root fields. + IndexableField[] fields = parsedDoc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + + assertEquals("field", fields[0].name()); + assertEquals(new BytesRef("value"), fields[0].binaryValue()); + assertFalse(fields[0].fieldType().stored()); + assertTrue(fields[0].fieldType().omitNorms()); + assertEquals(DocValuesType.NONE, fields[0].fieldType().docValuesType()); + + assertEquals("field", fields[1].name()); + assertEquals(new BytesRef("value"), fields[1].binaryValue()); + assertEquals(DocValuesType.SORTED_SET, fields[1].fieldType().docValuesType()); + + // Check the keyed fields. + IndexableField[] keyedFields = parsedDoc.rootDoc().getFields("field._keyed"); + assertEquals(2, keyedFields.length); + + assertEquals("field._keyed", keyedFields[0].name()); + assertEquals(new BytesRef("key\0value"), keyedFields[0].binaryValue()); + assertFalse(keyedFields[0].fieldType().stored()); + assertTrue(keyedFields[0].fieldType().omitNorms()); + assertEquals(DocValuesType.NONE, keyedFields[0].fieldType().docValuesType()); + + assertEquals("field._keyed", keyedFields[1].name()); + assertEquals(new BytesRef("key\0value"), keyedFields[1].binaryValue()); + assertEquals(DocValuesType.SORTED_SET, keyedFields[1].fieldType().docValuesType()); + + // Check that there is no 'field names' field. + IndexableField[] fieldNamesFields = parsedDoc.rootDoc().getFields(FieldNamesFieldMapper.NAME); + assertEquals(0, fieldNamesFields.length); + } + + public void testDisableIndex() throws Exception { + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "flattened") + .field("index", false) + .endObject() + .endObject() + .endObject() + .endObject()); + + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); + assertEquals(mapping, mapper.mappingSource().toString()); + + BytesReference doc = BytesReference.bytes(XContentFactory.jsonBuilder().startObject() + .startObject("field") + .field("key", "value") + .endObject() + .endObject()); + + ParsedDocument parsedDoc = mapper.parse(new SourceToParse("test", "type", "1", doc, XContentType.JSON)); + + IndexableField[] fields = parsedDoc.rootDoc().getFields("field"); + assertEquals(1, fields.length); + assertEquals(DocValuesType.SORTED_SET, fields[0].fieldType().docValuesType()); + + IndexableField[] keyedFields = parsedDoc.rootDoc().getFields("field._keyed"); + assertEquals(1, keyedFields.length); + assertEquals(DocValuesType.SORTED_SET, keyedFields[0].fieldType().docValuesType()); + } + + public void testDisableDocValues() throws Exception { + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "flattened") + .field("doc_values", false) + .endObject() + .endObject() + .endObject() + .endObject()); + + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); + assertEquals(mapping, mapper.mappingSource().toString()); + + BytesReference doc = BytesReference.bytes(XContentFactory.jsonBuilder().startObject() + .startObject("field") + .field("key", "value") + .endObject() + .endObject()); + + ParsedDocument parsedDoc = mapper.parse(new SourceToParse("test", "type", "1", doc, XContentType.JSON)); + + IndexableField[] fields = parsedDoc.rootDoc().getFields("field"); + assertEquals(1, fields.length); + assertEquals(DocValuesType.NONE, fields[0].fieldType().docValuesType()); + + IndexableField[] keyedFields = parsedDoc.rootDoc().getFields("field._keyed"); + assertEquals(1, keyedFields.length); + assertEquals(DocValuesType.NONE, keyedFields[0].fieldType().docValuesType()); + + IndexableField[] fieldNamesFields = parsedDoc.rootDoc().getFields(FieldNamesFieldMapper.NAME); + assertEquals(1, fieldNamesFields.length); + assertEquals("field", fieldNamesFields[0].stringValue()); + } + + public void testEnableStore() throws Exception { + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "flattened") + .field("store", true) + .endObject() + .endObject() + .endObject() + .endObject()); + + expectThrows(UnsupportedOperationException.class, () -> + parser.parse("type", new CompressedXContent(mapping))); + } + + public void testIndexOptions() throws IOException { + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "flattened") + .field("index_options", "freqs") + .endObject() + .endObject() + .endObject() + .endObject()); + + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); + assertEquals(mapping, mapper.mappingSource().toString()); + + for (String indexOptions : Arrays.asList("positions", "offsets")) { + String invalidMapping = Strings.toString(XContentFactory.jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "flattened") + .field("index_options", indexOptions) + .endObject() + .endObject() + .endObject() + .endObject()); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> parser.parse("type", new CompressedXContent(invalidMapping))); + assertEquals("The [flattened] field does not support positions, got [index_options]=" + indexOptions, e.getMessage()); + } + } + + public void testNullField() throws Exception { + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "flattened") + .endObject() + .endObject() + .endObject() + .endObject()); + + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); + assertEquals(mapping, mapper.mappingSource().toString()); + + BytesReference doc = BytesReference.bytes(XContentFactory.jsonBuilder().startObject() + .nullField("field") + .endObject()); + + ParsedDocument parsedDoc = mapper.parse(new SourceToParse("test", "type", "1", doc, XContentType.JSON)); + IndexableField[] fields = parsedDoc.rootDoc().getFields("field"); + assertEquals(0, fields.length); + } + + public void testMalformedJson() throws Exception { + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "flattened") + .endObject() + .endObject() + .endObject() + .endObject()); + + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); + assertEquals(mapping, mapper.mappingSource().toString()); + + BytesReference doc1 = BytesReference.bytes(XContentFactory.jsonBuilder().startObject() + .field("field", "not a JSON object") + .endObject()); + + expectThrows(MapperParsingException.class, () -> mapper.parse( + new SourceToParse("test", "type", "1", doc1, XContentType.JSON))); + + BytesReference doc2 = new BytesArray("{ \"field\": { \"key\": \"value\" "); + expectThrows(MapperParsingException.class, () -> mapper.parse( + new SourceToParse("test", "type", "1", doc2, XContentType.JSON))); + } + + public void testFieldMultiplicity() throws Exception { + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "flattened") + .endObject() + .endObject() + .endObject() + .endObject()); + + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); + assertEquals(mapping, mapper.mappingSource().toString()); + + BytesReference doc = BytesReference.bytes(XContentFactory.jsonBuilder().startObject() + .startArray("field") + .startObject() + .field("key1", "value") + .endObject() + .startObject() + .field("key2", true) + .field("key3", false) + .endObject() + .endArray() + .endObject()); + + ParsedDocument parsedDoc = mapper.parse(new SourceToParse("test", "type", "1", doc, XContentType.JSON)); + + IndexableField[] fields = parsedDoc.rootDoc().getFields("field"); + assertEquals(6, fields.length); + assertEquals(new BytesRef("value"), fields[0].binaryValue()); + assertEquals(new BytesRef("true"), fields[2].binaryValue()); + assertEquals(new BytesRef("false"), fields[4].binaryValue()); + + IndexableField[] keyedFields = parsedDoc.rootDoc().getFields("field._keyed"); + assertEquals(6, keyedFields.length); + assertEquals(new BytesRef("key1\0value"), keyedFields[0].binaryValue()); + assertEquals(new BytesRef("key2\0true"), keyedFields[2].binaryValue()); + assertEquals(new BytesRef("key3\0false"), keyedFields[4].binaryValue()); + } + + public void testDepthLimit() throws IOException { + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "flattened") + .field("depth_limit", 2) + .endObject() + .endObject() + .endObject() + .endObject()); + + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); + assertEquals(mapping, mapper.mappingSource().toString()); + + BytesReference doc = BytesReference.bytes(XContentFactory.jsonBuilder().startObject() + .startObject("field") + .startObject("key1") + .startObject("key2") + .field("key3", "value") + .endObject() + .endObject() + .endObject() + .endObject()); + + expectThrows(MapperParsingException.class, () -> + mapper.parse(new SourceToParse("test", "type", "1", doc, XContentType.JSON))); + } + + public void testEagerGlobalOrdinals() throws IOException { + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "flattened") + .field("eager_global_ordinals", true) + .endObject().endObject() + .endObject().endObject()); + + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); + assertEquals(mapping, mapper.mappingSource().toString()); + + FieldMapper fieldMapper = (FieldMapper) mapper.mappers().getMapper("field"); + assertTrue(fieldMapper.fieldType().eagerGlobalOrdinals()); + } + + public void testIgnoreAbove() throws IOException { + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "flattened") + .field("ignore_above", 10) + .endObject() + .endObject() + .endObject() + .endObject()); + + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); + assertEquals(mapping, mapper.mappingSource().toString()); + + BytesReference doc = BytesReference.bytes(XContentFactory.jsonBuilder().startObject() + .startArray("field") + .startObject() + .field("key", "a longer than usual value") + .endObject() + .endArray() + .endObject()); + + ParsedDocument parsedDoc = mapper.parse(new SourceToParse("test", "type", "1", doc, XContentType.JSON)); + IndexableField[] fields = parsedDoc.rootDoc().getFields("field"); + assertEquals(0, fields.length); + } + + public void testNullValues() throws Exception { + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "flattened") + .endObject() + .startObject("other_field") + .field("type", "flattened") + .field("null_value", "placeholder") + .endObject() + .endObject() + .endObject() + .endObject()); + + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); + assertEquals(mapping, mapper.mappingSource().toString()); + + BytesReference doc = BytesReference.bytes(XContentFactory.jsonBuilder().startObject() + .startObject("field") + .nullField("key") + .endObject() + .startObject("other_field") + .nullField("key") + .endObject() + .endObject()); + ParsedDocument parsedDoc = mapper.parse(new SourceToParse("test", "type", "1", doc, XContentType.JSON)); + + IndexableField[] fields = parsedDoc.rootDoc().getFields("field"); + assertEquals(0, fields.length); + + IndexableField[] otherFields = parsedDoc.rootDoc().getFields("other_field"); + assertEquals(2, otherFields.length); + assertEquals(new BytesRef("placeholder"), otherFields[0].binaryValue()); + assertEquals(new BytesRef("placeholder"), otherFields[1].binaryValue()); + + IndexableField[] prefixedOtherFields = parsedDoc.rootDoc().getFields("other_field._keyed"); + assertEquals(2, prefixedOtherFields.length); + assertEquals(new BytesRef("key\0placeholder"), prefixedOtherFields[0].binaryValue()); + assertEquals(new BytesRef("key\0placeholder"), prefixedOtherFields[1].binaryValue()); + } + + public void testSplitQueriesOnWhitespace() throws IOException { + MapperService mapperService = indexService.mapperService(); + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "flattened") + .field("split_queries_on_whitespace", true) + .endObject() + .endObject() + .endObject().endObject()); + mapperService.merge("type", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE); + + RootFlatObjectFieldType rootFieldType = (RootFlatObjectFieldType) mapperService.fullName("field"); + assertThat(rootFieldType.searchAnalyzer().name(), equalTo("whitespace")); + assertTokenStreamContents(rootFieldType.searchAnalyzer().analyzer().tokenStream("", "Hello World"), + new String[] {"Hello", "World"}); + + KeyedFlatObjectFieldType keyedFieldType = (KeyedFlatObjectFieldType) mapperService.fullName("field.key"); + assertThat(keyedFieldType.searchAnalyzer().name(), equalTo("whitespace")); + assertTokenStreamContents(keyedFieldType.searchAnalyzer().analyzer().tokenStream("", "Hello World"), + new String[] {"Hello", "World"}); + } +} diff --git a/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectFieldParserTests.java b/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectFieldParserTests.java new file mode 100644 index 00000000000..ff4de0d85ce --- /dev/null +++ b/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectFieldParserTests.java @@ -0,0 +1,318 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.flattened.mapper; + +import com.fasterxml.jackson.core.JsonParseException; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.common.xcontent.json.JsonXContent; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.MockFieldMapper.FakeFieldType; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.XContentTestUtils; +import org.junit.Before; + +import java.io.IOException; +import java.util.List; + +public class FlatObjectFieldParserTests extends ESTestCase { + private FlatObjectFieldParser parser; + + @Before + public void setUp() throws Exception { + super.setUp(); + parser = new FlatObjectFieldParser("field", "field._keyed", + new FakeFieldType(), + Integer.MAX_VALUE, + Integer.MAX_VALUE); + } + + public void testTextValues() throws Exception { + String input = "{ \"key1\": \"value1\", \"key2\": \"value2\" }"; + XContentParser xContentParser = createXContentParser(input); + + List fields = parser.parse(xContentParser); + assertEquals(4, fields.size()); + + IndexableField field1 = fields.get(0); + assertEquals("field", field1.name()); + assertEquals(new BytesRef("value1"), field1.binaryValue()); + + IndexableField keyedField1 = fields.get(1); + assertEquals("field._keyed", keyedField1.name()); + assertEquals(new BytesRef("key1\0value1"), keyedField1.binaryValue()); + + IndexableField field2 = fields.get(2); + assertEquals("field", field2.name()); + assertEquals(new BytesRef("value2"), field2.binaryValue()); + + IndexableField keyedField2 = fields.get(3); + assertEquals("field._keyed", keyedField2.name()); + assertEquals(new BytesRef("key2\0value2"), keyedField2.binaryValue()); + } + + public void testNumericValues() throws Exception { + String input = "{ \"key\": 2.718 }"; + XContentParser xContentParser = createXContentParser(input); + + List fields = parser.parse(xContentParser); + assertEquals(2, fields.size()); + + IndexableField field = fields.get(0); + assertEquals("field", field.name()); + assertEquals(new BytesRef("2.718"), field.binaryValue()); + + IndexableField keyedField = fields.get(1); + assertEquals("field._keyed", keyedField.name()); + assertEquals(new BytesRef("key" + '\0' + "2.718"), keyedField.binaryValue()); + } + + public void testBooleanValues() throws Exception { + String input = "{ \"key\": false }"; + XContentParser xContentParser = createXContentParser(input); + + List fields = parser.parse(xContentParser); + assertEquals(2, fields.size()); + + IndexableField field = fields.get(0); + assertEquals("field", field.name()); + assertEquals(new BytesRef("false"), field.binaryValue()); + + IndexableField keyedField = fields.get(1); + assertEquals("field._keyed", keyedField.name()); + assertEquals(new BytesRef("key\0false"), keyedField.binaryValue()); + } + + public void testBasicArrays() throws Exception { + String input = "{ \"key\": [true, false] }"; + XContentParser xContentParser = createXContentParser(input); + + List fields = parser.parse(xContentParser); + assertEquals(4, fields.size()); + + IndexableField field1 = fields.get(0); + assertEquals("field", field1.name()); + assertEquals(new BytesRef("true"), field1.binaryValue()); + + IndexableField keyedField1 = fields.get(1); + assertEquals("field._keyed", keyedField1.name()); + assertEquals(new BytesRef("key\0true"), keyedField1.binaryValue()); + + IndexableField field2 = fields.get(2); + assertEquals("field", field2.name()); + assertEquals(new BytesRef("false"), field2.binaryValue()); + + IndexableField keyedField2 = fields.get(3); + assertEquals("field._keyed", keyedField2.name()); + assertEquals(new BytesRef("key\0false"), keyedField2.binaryValue()); + } + + public void testArrayOfArrays() throws Exception { + String input = "{ \"key\": [[true, \"value\"], 3] }"; + XContentParser xContentParser = createXContentParser(input); + + List fields = parser.parse(xContentParser); + assertEquals(6, fields.size()); + + IndexableField field1 = fields.get(0); + assertEquals("field", field1.name()); + assertEquals(new BytesRef("true"), field1.binaryValue()); + + IndexableField keyedField1 = fields.get(1); + assertEquals("field._keyed", keyedField1.name()); + assertEquals(new BytesRef("key\0true"), keyedField1.binaryValue()); + + IndexableField field2 = fields.get(2); + assertEquals("field", field2.name()); + assertEquals(new BytesRef("value"), field2.binaryValue()); + + IndexableField keyedField2 = fields.get(3); + assertEquals("field._keyed", keyedField2.name()); + assertEquals(new BytesRef("key\0value"), keyedField2.binaryValue()); + + IndexableField field3 = fields.get(4); + assertEquals("field", field3.name()); + assertEquals(new BytesRef("3"), field3.binaryValue()); + + IndexableField keyedField3 = fields.get(5); + assertEquals("field._keyed", keyedField3.name()); + assertEquals(new BytesRef("key" + "\0" + "3"), keyedField3.binaryValue()); + } + + public void testArraysOfObjects() throws Exception { + String input = "{ \"key1\": [{ \"key2\": true }, false], \"key4\": \"other\" }"; + XContentParser xContentParser = createXContentParser(input); + + List fields = parser.parse(xContentParser); + assertEquals(6, fields.size()); + + IndexableField field1 = fields.get(0); + assertEquals("field", field1.name()); + assertEquals(new BytesRef("true"), field1.binaryValue()); + + IndexableField keyedField1 = fields.get(1); + assertEquals("field._keyed", keyedField1.name()); + assertEquals(new BytesRef("key1.key2\0true"), keyedField1.binaryValue()); + + IndexableField field2 = fields.get(2); + assertEquals("field", field2.name()); + assertEquals(new BytesRef("false"), field2.binaryValue()); + + IndexableField keyedField2 = fields.get(3); + assertEquals("field._keyed", keyedField2.name()); + assertEquals(new BytesRef("key1\0false"), keyedField2.binaryValue()); + + IndexableField field3 = fields.get(4); + assertEquals("field", field3.name()); + assertEquals(new BytesRef("other"), field3.binaryValue()); + + IndexableField keyedField3 = fields.get(5); + assertEquals("field._keyed", keyedField3.name()); + assertEquals(new BytesRef("key4\0other"), keyedField3.binaryValue()); + } + + public void testNestedObjects() throws Exception { + String input = "{ \"parent1\": { \"key\" : \"value\" }," + + "\"parent2\": { \"key\" : \"value\" }}"; + XContentParser xContentParser = createXContentParser(input); + + List fields = parser.parse(xContentParser); + assertEquals(4, fields.size()); + + IndexableField field1 = fields.get(0); + assertEquals("field", field1.name()); + assertEquals(new BytesRef("value"), field1.binaryValue()); + + IndexableField keyedField1 = fields.get(1); + assertEquals("field._keyed", keyedField1.name()); + assertEquals(new BytesRef("parent1.key\0value"), keyedField1.binaryValue()); + + IndexableField field2 = fields.get(2); + assertEquals("field", field2.name()); + assertEquals(new BytesRef("value"), field2.binaryValue()); + + IndexableField keyedField2 = fields.get(3); + assertEquals("field._keyed", keyedField2.name()); + assertEquals(new BytesRef("parent2.key\0value"), keyedField2.binaryValue()); + } + + public void testDepthLimit() throws Exception { + String input = "{ \"parent1\": { \"key\" : \"value\" }," + + "\"parent2\": [{ \"key\" : { \"key\" : \"value\" }}]}"; + XContentParser xContentParser = createXContentParser(input); + FlatObjectFieldParser configuredParser = new FlatObjectFieldParser("field", "field._keyed", + new FakeFieldType(), 2, Integer.MAX_VALUE); + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> configuredParser.parse(xContentParser)); + assertEquals("The provided [flattened] field [field] exceeds the maximum depth limit of [2].", e.getMessage()); + } + + public void testDepthLimitBoundary() throws Exception { + String input = "{ \"parent1\": { \"key\" : \"value\" }," + + "\"parent2\": [{ \"key\" : { \"key\" : \"value\" }}]}"; + XContentParser xContentParser = createXContentParser(input); + FlatObjectFieldParser configuredParser = new FlatObjectFieldParser("field", "field._keyed", + new FakeFieldType(), 3, Integer.MAX_VALUE); + + List fields = configuredParser.parse(xContentParser); + assertEquals(4, fields.size()); + } + + public void testIgnoreAbove() throws Exception { + String input = "{ \"key\": \"a longer field than usual\" }"; + XContentParser xContentParser = createXContentParser(input); + FlatObjectFieldParser configuredParser = new FlatObjectFieldParser("field", "field._keyed", + new FakeFieldType(), Integer.MAX_VALUE, 10); + + List fields = configuredParser.parse(xContentParser); + assertEquals(0, fields.size()); + } + + public void testNullValues() throws Exception { + String input = "{ \"key\": null}"; + XContentParser xContentParser = createXContentParser(input); + + List fields = parser.parse(xContentParser); + assertEquals(0, fields.size()); + + xContentParser = createXContentParser(input); + + MappedFieldType fieldType = new FakeFieldType(); + fieldType.setNullValue("placeholder"); + FlatObjectFieldParser configuredParser = new FlatObjectFieldParser("field", "field._keyed", + fieldType, Integer.MAX_VALUE, Integer.MAX_VALUE); + + fields = configuredParser.parse(xContentParser); + assertEquals(2, fields.size()); + + IndexableField field = fields.get(0); + assertEquals("field", field.name()); + assertEquals(new BytesRef("placeholder"), field.binaryValue()); + + IndexableField keyedField = fields.get(1); + assertEquals("field._keyed", keyedField.name()); + assertEquals(new BytesRef("key\0placeholder"), keyedField.binaryValue()); + } + + public void testMalformedJson() throws Exception { + String input = "{ \"key\": [true, false }"; + XContentParser xContentParser = createXContentParser(input); + + expectThrows(JsonParseException.class, () -> parser.parse(xContentParser)); + } + + public void testEmptyObject() throws Exception { + String input = "{}"; + XContentParser xContentParser = createXContentParser(input); + + List fields = parser.parse(xContentParser); + assertEquals(0, fields.size()); + } + + public void testRandomFields() throws Exception { + BytesReference input = BytesReference.bytes( + XContentBuilder.builder(JsonXContent.jsonXContent) + .startObject() + .startObject("object") + .field("key", "value") + .endObject() + .startArray("array") + .value(2.718) + .endArray() + .endObject()); + + input = XContentTestUtils.insertRandomFields(XContentType.JSON, input, null, random()); + XContentParser xContentParser = createXContentParser(input.utf8ToString()); + + List fields = parser.parse(xContentParser); + assertTrue(fields.size() > 4); + } + + public void testReservedCharacters() throws Exception { + BytesReference input = BytesReference.bytes( + XContentBuilder.builder(JsonXContent.jsonXContent) + .startObject() + .field("k\0y", "value") + .endObject()); + XContentParser xContentParser = createXContentParser(input.utf8ToString()); + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> parser.parse(xContentParser)); + assertEquals("Keys in [flattened] fields cannot contain the reserved character \\0. Offending key: [k\0y].", + e.getMessage()); + } + + private XContentParser createXContentParser(String input) throws IOException { + XContentParser xContentParser = createParser(JsonXContent.jsonXContent, input); + xContentParser.nextToken(); + return xContentParser; + } +} diff --git a/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectIndexFieldDataTests.java b/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectIndexFieldDataTests.java new file mode 100644 index 00000000000..97ca7d4d88a --- /dev/null +++ b/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectIndexFieldDataTests.java @@ -0,0 +1,106 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.flattened.mapper; + +import org.apache.lucene.analysis.core.KeywordAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader; +import org.elasticsearch.index.IndexService; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.IndexFieldDataCache; +import org.elasticsearch.index.fielddata.IndexFieldDataService; +import org.elasticsearch.index.mapper.ContentPath; +import org.elasticsearch.index.mapper.Mapper; +import org.elasticsearch.index.shard.ShardId; +import org.elasticsearch.indices.IndicesService; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.xpack.core.XPackPlugin; +import org.elasticsearch.xpack.flattened.FlattenedMapperPlugin; +import org.elasticsearch.xpack.flattened.mapper.FlatObjectFieldMapper.KeyedFlatObjectFieldData; +import org.elasticsearch.xpack.flattened.mapper.FlatObjectFieldMapper.KeyedFlatObjectFieldType; + +import java.io.IOException; +import java.util.Collection; +import java.util.concurrent.atomic.AtomicInteger; + +public class FlatObjectIndexFieldDataTests extends ESSingleNodeTestCase { + + @Override + protected Collection> getPlugins() { + return pluginList(FlattenedMapperPlugin.class, XPackPlugin.class); + } + + public void testGlobalFieldDataCaching() throws IOException { + // Set up the index service. + IndexService indexService = createIndex("test"); + IndicesService indicesService = getInstanceFromNode(IndicesService.class); + IndexFieldDataService ifdService = new IndexFieldDataService(indexService.getIndexSettings(), + indicesService.getIndicesFieldDataCache(), + indicesService.getCircuitBreakerService(), + indexService.mapperService()); + + Mapper.BuilderContext ctx = new Mapper.BuilderContext(indexService.getIndexSettings().getSettings(), new ContentPath(1)); + FlatObjectFieldMapper fieldMapper = new FlatObjectFieldMapper.Builder("json").build(ctx); + + AtomicInteger onCacheCalled = new AtomicInteger(); + ifdService.setListener(new IndexFieldDataCache.Listener() { + @Override + public void onCache(ShardId shardId, String fieldName, Accountable ramUsage) { + assertEquals(fieldMapper.keyedFieldName(), fieldName); + onCacheCalled.incrementAndGet(); + } + }); + + // Add some documents. + Directory directory = LuceneTestCase.newDirectory(); + IndexWriterConfig config = new IndexWriterConfig(new KeywordAnalyzer()); + IndexWriter writer = new IndexWriter(directory, config); + + Document doc = new Document(); + doc.add(new SortedSetDocValuesField("json._keyed", new BytesRef("some_key\0some_value"))); + writer.addDocument(doc); + writer.commit(); + writer.addDocument(doc); + DirectoryReader reader = ElasticsearchDirectoryReader.wrap( + DirectoryReader.open(writer), + new ShardId("test", "_na_", 1)); + + // Load global field data for subfield 'key'. + KeyedFlatObjectFieldType fieldType1 = fieldMapper.keyedFieldType("key"); + IndexFieldData ifd1 = ifdService.getForField(fieldType1); + assertTrue(ifd1 instanceof KeyedFlatObjectFieldData); + + KeyedFlatObjectFieldData fieldData1 = (KeyedFlatObjectFieldData) ifd1; + assertEquals("key", fieldData1.getKey()); + fieldData1.loadGlobal(reader); + assertEquals(1, onCacheCalled.get()); + + // Load global field data for the subfield 'other_key'. + KeyedFlatObjectFieldType fieldType2 = fieldMapper.keyedFieldType("other_key"); + IndexFieldData ifd2 = ifdService.getForField(fieldType2); + assertTrue(ifd2 instanceof KeyedFlatObjectFieldData); + + KeyedFlatObjectFieldData fieldData2 = (KeyedFlatObjectFieldData) ifd2; + assertEquals("other_key", fieldData2.getKey()); + fieldData2.loadGlobal(reader); + assertEquals(1, onCacheCalled.get()); + + ifdService.clear(); + reader.close(); + writer.close(); + directory.close(); + } +} diff --git a/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectSearchTests.java b/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectSearchTests.java new file mode 100644 index 00000000000..373d1d16d84 --- /dev/null +++ b/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/FlatObjectSearchTests.java @@ -0,0 +1,512 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.flattened.mapper; + +import org.elasticsearch.action.bulk.BulkRequestBuilder; +import org.elasticsearch.action.bulk.BulkResponse; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; +import org.elasticsearch.common.document.DocumentField; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.bucket.terms.Terms; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory; +import org.elasticsearch.search.aggregations.metrics.Cardinality; +import org.elasticsearch.search.sort.FieldSortBuilder; +import org.elasticsearch.search.sort.SortOrder; +import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.xpack.core.XPackPlugin; +import org.elasticsearch.xpack.flattened.FlattenedMapperPlugin; +import org.junit.Before; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import static org.elasticsearch.index.query.QueryBuilders.existsQuery; +import static org.elasticsearch.index.query.QueryBuilders.matchQuery; +import static org.elasticsearch.index.query.QueryBuilders.multiMatchQuery; +import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery; +import static org.elasticsearch.index.query.QueryBuilders.simpleQueryStringQuery; +import static org.elasticsearch.search.aggregations.AggregationBuilders.cardinality; +import static org.elasticsearch.search.aggregations.AggregationBuilders.terms; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertOrderedSearchHits; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse; +import static org.hamcrest.CoreMatchers.startsWith; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.notNullValue; + +public class FlatObjectSearchTests extends ESSingleNodeTestCase { + + protected Collection> getPlugins() { + return pluginList(FlattenedMapperPlugin.class, XPackPlugin.class); + } + + @Before + public void setUpIndex() throws IOException { + XContentBuilder mapping = XContentFactory.jsonBuilder().startObject() + .startObject("_doc") + .startObject("properties") + .startObject("flat_object") + .field("type", "flattened") + .field("split_queries_on_whitespace", true) + .endObject() + .startObject("headers") + .field("type", "flattened") + .field("split_queries_on_whitespace", true) + .endObject() + .startObject("labels") + .field("type", "flattened") + .endObject() + .endObject() + .endObject() + .endObject(); + createIndex("test", Settings.EMPTY, "_doc", mapping); + } + + public void testMatchQuery() throws Exception { + client().prepareIndex("test", "_doc", "1") + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject("headers") + .field("content-type", "application/json") + .field("origin", "https://www.elastic.co") + .endObject() + .endObject()) + .get(); + + SearchResponse searchResponse = client().prepareSearch() + .setQuery(matchQuery("headers", "application/json")) + .get(); + assertHitCount(searchResponse, 1L); + + // Check that queries are split on whitespace. + searchResponse = client().prepareSearch() + .setQuery(matchQuery("headers.content-type", "application/json text/plain")) + .get(); + assertHitCount(searchResponse, 1L); + + searchResponse = client().prepareSearch() + .setQuery(matchQuery("headers.origin", "application/json")) + .get(); + assertHitCount(searchResponse, 0L); + } + + public void testMultiMatchQuery() throws Exception { + client().prepareIndex("test", "_doc", "1") + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject("headers") + .field("content-type", "application/json") + .field("origin", "https://www.elastic.co") + .endObject() + .endObject()) + .get(); + + SearchResponse searchResponse = client().prepareSearch() + .setQuery(multiMatchQuery("application/json", "headers")) + .get(); + assertHitCount(searchResponse, 1L); + + searchResponse = client().prepareSearch() + .setQuery(multiMatchQuery("application/json text/plain", "headers.content-type")) + .get(); + assertHitCount(searchResponse, 1L); + + searchResponse = client().prepareSearch() + .setQuery(multiMatchQuery("application/json", "headers.origin")) + .get(); + assertHitCount(searchResponse, 0L); + + searchResponse = client().prepareSearch() + .setQuery(multiMatchQuery("application/json", "headers.origin", "headers.contentType")) + .get(); + assertHitCount(searchResponse, 0L); + } + + public void testQueryStringQuery() throws Exception { + client().prepareIndex("test", "_doc", "1") + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject("flat_object") + .field("field1", "value") + .field("field2", "2.718") + .endObject() + .endObject()) + .get(); + + SearchResponse response = client().prepareSearch("test") + .setQuery(queryStringQuery("flat_object.field1:value")) + .get(); + assertSearchResponse(response); + assertHitCount(response, 1); + + response = client().prepareSearch("test") + .setQuery(queryStringQuery("flat_object.field1:value AND flat_object:2.718")) + .get(); + assertSearchResponse(response); + assertHitCount(response, 1); + + response = client().prepareSearch("test") + .setQuery(queryStringQuery("2.718").field("flat_object.field2")) + .get(); + assertSearchResponse(response); + assertHitCount(response, 1); + } + + public void testSimpleQueryStringQuery() throws Exception { + client().prepareIndex("test", "_doc", "1") + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject("flat_object") + .field("field1", "value") + .field("field2", "2.718") + .endObject() + .endObject()) + .get(); + + SearchResponse response = client().prepareSearch("test") + .setQuery(simpleQueryStringQuery("value").field("flat_object.field1")) + .get(); + assertSearchResponse(response); + assertHitCount(response, 1); + + response = client().prepareSearch("test") + .setQuery(simpleQueryStringQuery("+value +2.718").field("flat_object")) + .get(); + assertSearchResponse(response); + assertHitCount(response, 1); + + response = client().prepareSearch("test") + .setQuery(simpleQueryStringQuery("+value +3.141").field("flat_object")) + .get(); + assertSearchResponse(response); + assertHitCount(response, 0); + } + + public void testExists() throws Exception { + client().prepareIndex("test", "_doc", "1") + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject("headers") + .field("content-type", "application/json") + .endObject() + .endObject()) + .get(); + + SearchResponse searchResponse = client().prepareSearch() + .setQuery(existsQuery("headers")) + .get(); + assertHitCount(searchResponse, 1L); + + searchResponse = client().prepareSearch() + .setQuery(existsQuery("headers.content-type")) + .get(); + assertHitCount(searchResponse, 1L); + + searchResponse = client().prepareSearch() + .setQuery(existsQuery("headers.nonexistent")) + .get(); + assertHitCount(searchResponse, 0L); + } + + public void testCardinalityAggregation() throws IOException { + int numDocs = randomIntBetween(2, 100); + int precisionThreshold = randomIntBetween(0, 1 << randomInt(20)); + + BulkRequestBuilder bulkRequest = client().prepareBulk("test", "_doc") + .setRefreshPolicy(RefreshPolicy.IMMEDIATE); + + // Add a random number of documents containing a flat object field, plus + // a small number of dummy documents. + for (int i = 0; i < numDocs; ++i) { + bulkRequest.add(client().prepareIndex() + .setSource(XContentFactory.jsonBuilder().startObject() + .startObject("flat_object") + .field("first", i) + .field("second", i / 2) + .endObject() + .endObject())); + } + + for (int i = 0; i < 10; i++) { + bulkRequest.add(client().prepareIndex("test", "_doc") + .setSource("other_field", "1")); + } + + BulkResponse bulkResponse = bulkRequest.get(); + assertNoFailures(bulkResponse); + + // Test the root flat object field. + SearchResponse response = client().prepareSearch("test") + .addAggregation(cardinality("cardinality") + .precisionThreshold(precisionThreshold) + .field("flat_object")) + .get(); + + assertSearchResponse(response); + Cardinality count = response.getAggregations().get("cardinality"); + assertCardinality(count, numDocs, precisionThreshold); + + // Test two keyed flat object fields. + SearchResponse firstResponse = client().prepareSearch("test") + .addAggregation(cardinality("cardinality") + .precisionThreshold(precisionThreshold) + .field("flat_object.first")) + .get(); + assertSearchResponse(firstResponse); + + Cardinality firstCount = firstResponse.getAggregations().get("cardinality"); + assertCardinality(firstCount, numDocs, precisionThreshold); + + SearchResponse secondResponse = client().prepareSearch("test") + .addAggregation(cardinality("cardinality") + .precisionThreshold(precisionThreshold) + .field("flat_object.second")) + .get(); + assertSearchResponse(secondResponse); + + Cardinality secondCount = secondResponse.getAggregations().get("cardinality"); + assertCardinality(secondCount, (numDocs + 1) / 2, precisionThreshold); + } + + private void assertCardinality(Cardinality count, long value, int precisionThreshold) { + if (value <= precisionThreshold) { + // linear counting should be picked, and should be accurate + assertEquals(value, count.getValue()); + } else { + // error is not bound, so let's just make sure it is > 0 + assertThat(count.getValue(), greaterThan(0L)); + } + } + + public void testTermsAggregation() throws IOException { + BulkRequestBuilder bulkRequest = client().prepareBulk("test", "_doc") + .setRefreshPolicy(RefreshPolicy.IMMEDIATE); + for (int i = 0; i < 5; i++) { + bulkRequest.add(client().prepareIndex() + .setSource(XContentFactory.jsonBuilder().startObject() + .startObject("labels") + .field("priority", "urgent") + .field("release", "v1.2." + i) + .endObject() + .endObject())); + } + + BulkResponse bulkResponse = bulkRequest.get(); + assertNoFailures(bulkResponse); + + // Aggregate on the root 'labels' field. + TermsAggregationBuilder builder = createTermsAgg("labels"); + SearchResponse response = client().prepareSearch("test") + .addAggregation(builder) + .get(); + assertSearchResponse(response); + + Terms terms = response.getAggregations().get("terms"); + assertThat(terms, notNullValue()); + assertThat(terms.getName(), equalTo("terms")); + assertThat(terms.getBuckets().size(), equalTo(6)); + + Terms.Bucket bucket1 = terms.getBuckets().get(0); + assertEquals("urgent", bucket1.getKey()); + assertEquals(5, bucket1.getDocCount()); + + Terms.Bucket bucket2 = terms.getBuckets().get(1); + assertThat(bucket2.getKeyAsString(), startsWith("v1.2.")); + assertEquals(1, bucket2.getDocCount()); + + // Aggregate on the 'priority' subfield. + TermsAggregationBuilder priorityAgg = createTermsAgg("labels.priority"); + SearchResponse priorityResponse = client().prepareSearch("test") + .addAggregation(priorityAgg) + .get(); + assertSearchResponse(priorityResponse); + + Terms priorityTerms = priorityResponse.getAggregations().get("terms"); + assertThat(priorityTerms, notNullValue()); + assertThat(priorityTerms.getName(), equalTo("terms")); + assertThat(priorityTerms.getBuckets().size(), equalTo(1)); + + Terms.Bucket priorityBucket = priorityTerms.getBuckets().get(0); + assertEquals("urgent", priorityBucket.getKey()); + assertEquals(5, priorityBucket.getDocCount()); + + // Aggregate on the 'release' subfield. + TermsAggregationBuilder releaseAgg = createTermsAgg("labels.release"); + SearchResponse releaseResponse = client().prepareSearch("test") + .addAggregation(releaseAgg) + .get(); + assertSearchResponse(releaseResponse); + + Terms releaseTerms = releaseResponse.getAggregations().get("terms"); + assertThat(releaseTerms, notNullValue()); + assertThat(releaseTerms.getName(), equalTo("terms")); + assertThat(releaseTerms.getBuckets().size(), equalTo(5)); + + for (Terms.Bucket bucket : releaseTerms.getBuckets()) { + assertThat(bucket.getKeyAsString(), startsWith("v1.2.")); + assertEquals(1, bucket.getDocCount()); + } + + // Aggregate on the 'priority' subfield with a min_doc_count of 0. + TermsAggregationBuilder minDocCountAgg = createTermsAgg("labels.priority") + .minDocCount(0); + SearchResponse minDocCountResponse = client().prepareSearch("test") + .addAggregation(minDocCountAgg) + .get(); + assertSearchResponse(minDocCountResponse); + + Terms minDocCountTerms = minDocCountResponse.getAggregations().get("terms"); + assertThat(minDocCountTerms, notNullValue()); + assertThat(minDocCountTerms.getName(), equalTo("terms")); + assertThat(minDocCountTerms.getBuckets().size(), equalTo(1)); + } + + private TermsAggregationBuilder createTermsAgg(String field) { + TermsAggregatorFactory.ExecutionMode executionMode = randomFrom( + TermsAggregatorFactory.ExecutionMode.values()); + Aggregator.SubAggCollectionMode collectionMode = randomFrom( + Aggregator.SubAggCollectionMode.values()); + + return terms("terms") + .field(field) + .collectMode(collectionMode) + .executionHint(executionMode.toString()); + } + + + public void testLoadDocValuesFields() throws Exception { + client().prepareIndex("test", "_doc", "1") + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject("flat_object") + .field("key", "value") + .field("other_key", "other_value") + .endObject() + .endObject()) + .get(); + + SearchResponse response = client().prepareSearch("test") + .addDocValueField("flat_object") + .addDocValueField("flat_object.key") + .get(); + assertSearchResponse(response); + assertHitCount(response, 1); + + Map fields = response.getHits().getAt(0).getFields(); + + DocumentField field = fields.get("flat_object"); + assertEquals("flat_object", field.getName()); + assertEquals(Arrays.asList("other_value", "value"), field.getValues()); + + DocumentField keyedField = fields.get("flat_object.key"); + assertEquals("flat_object.key", keyedField.getName()); + assertEquals("value", keyedField.getValue()); + } + + public void testFieldSort() throws Exception { + client().prepareIndex("test", "_doc", "1") + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject("flat_object") + .field("key", "A") + .field("other_key", "D") + .endObject() + .endObject()) + .get(); + + client().prepareIndex("test", "_doc", "2") + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject("flat_object") + .field("key", "B") + .field("other_key", "C") + .endObject() + .endObject()) + .get(); + + client().prepareIndex("test", "_doc", "3") + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject("flat_object") + .field("other_key", "E") + .endObject() + .endObject()) + .get(); + + SearchResponse response = client().prepareSearch("test") + .addSort("flat_object", SortOrder.DESC) + .get(); + assertSearchResponse(response); + assertHitCount(response, 3); + assertOrderedSearchHits(response, "3", "1", "2"); + + response = client().prepareSearch("test") + .addSort("flat_object.key", SortOrder.DESC) + .get(); + assertSearchResponse(response); + assertHitCount(response, 3); + assertOrderedSearchHits(response, "2", "1", "3"); + + response = client().prepareSearch("test") + .addSort(new FieldSortBuilder("flat_object.key").order(SortOrder.DESC).missing("Z")) + .get(); + assertSearchResponse(response); + assertHitCount(response, 3); + assertOrderedSearchHits(response, "3", "2", "1"); + } + + public void testSourceFiltering() { + Map headers = new HashMap<>(); + headers.put("content-type", "application/json"); + headers.put("origin", "https://www.elastic.co"); + Map source = Collections.singletonMap("headers", headers); + + client().prepareIndex("test", "_doc", "1") + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .setSource(source) + .get(); + + SearchResponse response = client().prepareSearch("test").setFetchSource(true).get(); + assertThat(response.getHits().getAt(0).getSourceAsMap(), equalTo(source)); + + // Check 'include' filtering. + response = client().prepareSearch("test").setFetchSource("headers", null).get(); + assertThat(response.getHits().getAt(0).getSourceAsMap(), equalTo(source)); + + response = client().prepareSearch("test").setFetchSource("headers.content-type", null).get(); + Map filteredSource = Collections.singletonMap("headers", + Collections.singletonMap("content-type", "application/json")); + assertThat(response.getHits().getAt(0).getSourceAsMap(), equalTo(filteredSource)); + + // Check 'exclude' filtering. + response = client().prepareSearch("test").setFetchSource(null, "headers.content-type").get(); + filteredSource = Collections.singletonMap("headers", + Collections.singletonMap("origin", "https://www.elastic.co")); + assertThat(response.getHits().getAt(0).getSourceAsMap(), equalTo(filteredSource)); + } +} diff --git a/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/KeyedFlatObjectAtomicFieldDataTests.java b/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/KeyedFlatObjectAtomicFieldDataTests.java new file mode 100644 index 00000000000..bb6c195395d --- /dev/null +++ b/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/KeyedFlatObjectAtomicFieldDataTests.java @@ -0,0 +1,204 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.flattened.mapper; + +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.index.fielddata.AbstractSortedSetDocValues; +import org.elasticsearch.index.fielddata.AtomicOrdinalsFieldData; +import org.elasticsearch.index.fielddata.plain.AbstractAtomicOrdinalsFieldData; +import org.elasticsearch.test.ESTestCase; +import org.junit.Before; + +import java.io.IOException; + +import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS; + +public class KeyedFlatObjectAtomicFieldDataTests extends ESTestCase { + private AtomicOrdinalsFieldData delegate; + + @Before + public void setUpDelegate() { + BytesRef[] allTerms = new BytesRef[60]; + long[] documentOrds = new long[50]; + int index = 0; + + for (int ord = 0; ord < allTerms.length; ord++) { + String key; + if (ord < 20) { + key = "apple"; + } else if (ord < 30) { + key = "avocado"; + } else if (ord < 40) { + key = "banana"; + } else if (ord < 41) { + key = "cantaloupe"; + } else { + key = "cucumber"; + } + + allTerms[ord] = prefixedValue(key, "value" + ord); + + // Do not include the term 'avocado' in the mock document. + if (key.equals("avocado") == false) { + documentOrds[index++] = ord; + } + } + + delegate = new MockAtomicOrdinalsFieldData(allTerms, documentOrds); + } + + private BytesRef prefixedValue(String key, String value) { + String term = FlatObjectFieldParser.createKeyedValue(key, value); + return new BytesRef(term); + } + + public void testFindOrdinalBounds() throws IOException { + testFindOrdinalBounds("apple", delegate, 0, 19); + testFindOrdinalBounds("avocado", delegate, 20, 29); + testFindOrdinalBounds("banana", delegate, 30, 39); + testFindOrdinalBounds("berry", delegate, -1, -1); + testFindOrdinalBounds("cantaloupe", delegate, 40, 40); + testFindOrdinalBounds("cucumber", delegate, 41, 59); + + AtomicOrdinalsFieldData emptyDelegate = new MockAtomicOrdinalsFieldData(new BytesRef[0], new long[0]); + testFindOrdinalBounds("apple", emptyDelegate, -1, -1); + + BytesRef[] terms = new BytesRef[] { prefixedValue("prefix", "value") }; + AtomicOrdinalsFieldData singleValueDelegate = new MockAtomicOrdinalsFieldData(terms, new long[0]); + testFindOrdinalBounds("prefix", singleValueDelegate, 0, 0); + testFindOrdinalBounds("prefix1", singleValueDelegate, -1, -1); + + terms = new BytesRef[] { prefixedValue("prefix", "value"), + prefixedValue("prefix1", "value"), + prefixedValue("prefix1", "value1"), + prefixedValue("prefix2", "value"), + prefixedValue("prefix3", "value")}; + AtomicOrdinalsFieldData oddLengthDelegate = new MockAtomicOrdinalsFieldData(terms, new long[0]); + testFindOrdinalBounds("prefix", oddLengthDelegate, 0, 0); + testFindOrdinalBounds("prefix1", oddLengthDelegate, 1, 2); + testFindOrdinalBounds("prefix2", oddLengthDelegate, 3, 3); + testFindOrdinalBounds("prefix3", oddLengthDelegate, 4, 4); + } + + public void testFindOrdinalBounds(String key, + AtomicOrdinalsFieldData delegate, + long expectedMinOrd, + long expectedMacOrd) throws IOException { + BytesRef bytesKey = new BytesRef(key); + + long actualMinOrd = KeyedFlatObjectAtomicFieldData.findMinOrd(bytesKey, delegate.getOrdinalsValues()); + assertEquals(expectedMinOrd, actualMinOrd); + + long actualMaxOrd = KeyedFlatObjectAtomicFieldData.findMaxOrd(bytesKey, delegate.getOrdinalsValues()); + assertEquals(expectedMacOrd, actualMaxOrd); + } + + public void testAdvanceExact() throws IOException { + AtomicOrdinalsFieldData avocadoFieldData = new KeyedFlatObjectAtomicFieldData("avocado", delegate); + assertFalse(avocadoFieldData.getOrdinalsValues().advanceExact(0)); + + AtomicOrdinalsFieldData bananaFieldData = new KeyedFlatObjectAtomicFieldData("banana", delegate); + assertTrue(bananaFieldData.getOrdinalsValues().advanceExact(0)); + + AtomicOrdinalsFieldData nonexistentFieldData = new KeyedFlatObjectAtomicFieldData("berry", delegate); + assertFalse(nonexistentFieldData.getOrdinalsValues().advanceExact(0)); + } + + public void testNextOrd() throws IOException { + AtomicOrdinalsFieldData fieldData = new KeyedFlatObjectAtomicFieldData("banana", delegate); + SortedSetDocValues docValues = fieldData.getOrdinalsValues(); + docValues.advanceExact(0); + + int retrievedOrds = 0; + for (long ord = docValues.nextOrd(); ord != NO_MORE_ORDS; ord = docValues.nextOrd()) { + assertTrue(0 <= ord && ord < 10); + retrievedOrds++; + + BytesRef expectedValue = new BytesRef("value" + (ord + 30)); + BytesRef actualValue = docValues.lookupOrd(ord); + assertEquals(expectedValue, actualValue); + } + + assertEquals(10, retrievedOrds); + } + + public void testLookupOrd() throws IOException { + AtomicOrdinalsFieldData appleFieldData = new KeyedFlatObjectAtomicFieldData("apple", delegate); + SortedSetDocValues appleDocValues = appleFieldData.getOrdinalsValues(); + assertEquals(new BytesRef("value0"), appleDocValues.lookupOrd(0)); + + AtomicOrdinalsFieldData cantaloupeFieldData = new KeyedFlatObjectAtomicFieldData("cantaloupe", delegate); + SortedSetDocValues cantaloupeDocValues = cantaloupeFieldData.getOrdinalsValues(); + assertEquals(new BytesRef("value40"), cantaloupeDocValues.lookupOrd(0)); + + AtomicOrdinalsFieldData cucumberFieldData = new KeyedFlatObjectAtomicFieldData("cucumber", delegate); + SortedSetDocValues cucumberDocValues = cucumberFieldData.getOrdinalsValues(); + assertEquals(new BytesRef("value41"), cucumberDocValues.lookupOrd(0)); + } + + private static class MockAtomicOrdinalsFieldData extends AbstractAtomicOrdinalsFieldData { + private final SortedSetDocValues docValues; + + MockAtomicOrdinalsFieldData(BytesRef[] allTerms, + long[] documentOrds) { + super(AbstractAtomicOrdinalsFieldData.DEFAULT_SCRIPT_FUNCTION); + this.docValues = new MockSortedSetDocValues(allTerms, documentOrds); + } + + @Override + public SortedSetDocValues getOrdinalsValues() { + return docValues; + } + + @Override + public long ramBytesUsed() { + return 0; + } + + @Override + public void close() { + // Nothing to do. + } + } + + private static class MockSortedSetDocValues extends AbstractSortedSetDocValues { + private final BytesRef[] allTerms; + private final long[] documentOrds; + private int index; + + MockSortedSetDocValues(BytesRef[] allTerms, + long[] documentOrds) { + this.allTerms = allTerms; + this.documentOrds = documentOrds; + } + + @Override + public boolean advanceExact(int docID) { + index = 0; + return true; + } + + @Override + public long nextOrd() { + if (index == documentOrds.length) { + return NO_MORE_ORDS; + } + return documentOrds[index++]; + } + + @Override + public BytesRef lookupOrd(long ord) { + return allTerms[(int) ord]; + } + + @Override + public long getValueCount() { + return allTerms.length; + } + } +} diff --git a/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/KeyedFlatObjectFieldTypeTests.java b/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/KeyedFlatObjectFieldTypeTests.java new file mode 100644 index 00000000000..46901035c8a --- /dev/null +++ b/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/KeyedFlatObjectFieldTypeTests.java @@ -0,0 +1,155 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.flattened.mapper; + +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermInSetQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.index.mapper.FieldTypeTestCase; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.xpack.flattened.mapper.FlatObjectFieldMapper.KeyedFlatObjectFieldType; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.List; + +public class KeyedFlatObjectFieldTypeTests extends FieldTypeTestCase { + + @Before + public void setupProperties() { + addModifier(new Modifier("split_queries_on_whitespace", true) { + @Override + public void modify(MappedFieldType type) { + KeyedFlatObjectFieldType ft = (KeyedFlatObjectFieldType) type; + ft.setSplitQueriesOnWhitespace(!ft.splitQueriesOnWhitespace()); + } + }); + } + + @Override + protected KeyedFlatObjectFieldType createDefaultFieldType() { + return new KeyedFlatObjectFieldType("key"); + } + + public void testIndexedValueForSearch() { + KeyedFlatObjectFieldType ft = createDefaultFieldType(); + ft.setName("field"); + + BytesRef keywordValue = ft.indexedValueForSearch("value"); + assertEquals(new BytesRef("key\0value"), keywordValue); + + BytesRef doubleValue = ft.indexedValueForSearch(2.718); + assertEquals(new BytesRef("key\0" + "2.718"), doubleValue); + + BytesRef booleanValue = ft.indexedValueForSearch(true); + assertEquals(new BytesRef("key\0true"), booleanValue); + } + + public void testTermQuery() { + KeyedFlatObjectFieldType ft = createDefaultFieldType(); + ft.setName("field"); + + Query expected = new TermQuery(new Term("field", "key\0value")); + assertEquals(expected, ft.termQuery("value", null)); + + ft.setIndexOptions(IndexOptions.NONE); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> ft.termQuery("field", null)); + assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + } + + public void testTermsQuery() { + KeyedFlatObjectFieldType ft = createDefaultFieldType(); + ft.setName("field"); + + Query expected = new TermInSetQuery("field", + new BytesRef("key\0value1"), + new BytesRef("key\0value2")); + + List terms = new ArrayList<>(); + terms.add("value1"); + terms.add("value2"); + Query actual = ft.termsQuery(terms, null); + + assertEquals(expected, actual); + } + + public void testExistsQuery() { + KeyedFlatObjectFieldType ft = createDefaultFieldType(); + ft.setName("field"); + + Query expected = new PrefixQuery(new Term("field", "key\0")); + assertEquals(expected, ft.existsQuery(null)); + } + + public void testPrefixQuery() { + KeyedFlatObjectFieldType ft = createDefaultFieldType(); + ft.setName("field"); + + Query expected = new PrefixQuery(new Term("field", "key\0val")); + assertEquals(expected, ft.prefixQuery("val", MultiTermQuery.CONSTANT_SCORE_REWRITE, null)); + } + + public void testFuzzyQuery() { + KeyedFlatObjectFieldType ft = createDefaultFieldType(); + ft.setName("field"); + + UnsupportedOperationException e = expectThrows(UnsupportedOperationException.class, + () -> ft.fuzzyQuery("valuee", Fuzziness.fromEdits(2), 1, 50, true)); + assertEquals("[fuzzy] queries are not currently supported on keyed [flattened] fields.", e.getMessage()); + } + + public void testRangeQuery() { + KeyedFlatObjectFieldType ft = createDefaultFieldType(); + ft.setName("field"); + + TermRangeQuery expected = new TermRangeQuery("field", + new BytesRef("key\0lower"), + new BytesRef("key\0upper"), false, false); + assertEquals(expected, ft.rangeQuery("lower", "upper", false, false, null)); + + expected = new TermRangeQuery("field", + new BytesRef("key\0lower"), + new BytesRef("key\0upper"), true, true); + assertEquals(expected, ft.rangeQuery("lower", "upper", true, true, null)); + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> + ft.rangeQuery("lower", null, false, false, null)); + assertEquals("[range] queries on keyed [flattened] fields must include both an upper and a lower bound.", + e.getMessage()); + + e = expectThrows(IllegalArgumentException.class, () -> + ft.rangeQuery(null, "upper", false, false, null)); + assertEquals("[range] queries on keyed [flattened] fields must include both an upper and a lower bound.", + e.getMessage()); + } + + public void testRegexpQuery() { + KeyedFlatObjectFieldType ft = createDefaultFieldType(); + ft.setName("field"); + + UnsupportedOperationException e = expectThrows(UnsupportedOperationException.class, + () -> ft.regexpQuery("valu*", 0, 10, null, null)); + assertEquals("[regexp] queries are not currently supported on keyed [flattened] fields.", e.getMessage()); + } + + public void testWildcardQuery() { + KeyedFlatObjectFieldType ft = createDefaultFieldType(); + ft.setName("field"); + + UnsupportedOperationException e = expectThrows(UnsupportedOperationException.class, + () -> ft.wildcardQuery("valu*", null, null)); + assertEquals("[wildcard] queries are not currently supported on keyed [flattened] fields.", e.getMessage()); + } +} diff --git a/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/RootFlatObjectFieldTypeTests.java b/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/RootFlatObjectFieldTypeTests.java new file mode 100644 index 00000000000..be297663c6e --- /dev/null +++ b/x-pack/plugin/mapper-flattened/src/test/java/org/elasticsearch/xpack/flattened/mapper/RootFlatObjectFieldTypeTests.java @@ -0,0 +1,116 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.flattened.mapper; + +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocValuesFieldExistsQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.index.mapper.FieldNamesFieldMapper; +import org.elasticsearch.index.mapper.FieldTypeTestCase; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.xpack.flattened.mapper.FlatObjectFieldMapper.RootFlatObjectFieldType; +import org.junit.Before; + +public class RootFlatObjectFieldTypeTests extends FieldTypeTestCase { + + @Before + public void setupProperties() { + addModifier(new Modifier("split_queries_on_whitespace", true) { + @Override + public void modify(MappedFieldType type) { + RootFlatObjectFieldType ft = (RootFlatObjectFieldType) type; + ft.setSplitQueriesOnWhitespace(!ft.splitQueriesOnWhitespace()); + } + }); + } + + @Override + protected RootFlatObjectFieldType createDefaultFieldType() { + return new RootFlatObjectFieldType(); + } + + public void testValueForDisplay() { + RootFlatObjectFieldType ft = createDefaultFieldType(); + + String fieldValue = "{ \"key\": \"value\" }"; + BytesRef storedValue = new BytesRef(fieldValue); + assertEquals(fieldValue, ft.valueForDisplay(storedValue)); + } + + public void testTermQuery() { + RootFlatObjectFieldType ft = createDefaultFieldType(); + ft.setName("field"); + + Query expected = new TermQuery(new Term("field", "value")); + assertEquals(expected, ft.termQuery("value", null)); + + ft.setIndexOptions(IndexOptions.NONE); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> ft.termQuery("field", null)); + assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + } + + public void testExistsQuery() { + RootFlatObjectFieldType ft = new RootFlatObjectFieldType(); + ft.setName("field"); + assertEquals( + new TermQuery(new Term(FieldNamesFieldMapper.NAME, new BytesRef("field"))), + ft.existsQuery(null)); + + ft.setHasDocValues(true); + assertEquals(new DocValuesFieldExistsQuery("field"), ft.existsQuery(null)); + } + + public void testFuzzyQuery() { + RootFlatObjectFieldType ft = createDefaultFieldType(); + ft.setName("field"); + + Query expected = new FuzzyQuery(new Term("field", "value"), 2, 1, 50, true); + Query actual = ft.fuzzyQuery("value", Fuzziness.fromEdits(2), 1, 50, true); + assertEquals(expected, actual); + } + + public void testRangeQuery() { + RootFlatObjectFieldType ft = createDefaultFieldType(); + ft.setName("field"); + + TermRangeQuery expected = new TermRangeQuery("field", + new BytesRef("lower"), + new BytesRef("upper"), false, false); + assertEquals(expected, ft.rangeQuery("lower", "upper", false, false, null)); + + expected = new TermRangeQuery("field", + new BytesRef("lower"), + new BytesRef("upper"), true, true); + assertEquals(expected, ft.rangeQuery("lower", "upper", true, true, null)); + } + + public void testRegexpQuery() { + RootFlatObjectFieldType ft = createDefaultFieldType(); + ft.setName("field"); + + Query expected = new RegexpQuery(new Term("field", "val.*")); + Query actual = ft.regexpQuery("val.*", 0, 10, null, null); + assertEquals(expected, actual); + } + + public void testWildcardQuery() { + RootFlatObjectFieldType ft = createDefaultFieldType(); + ft.setName("field"); + + Query expected = new WildcardQuery(new Term("field", new BytesRef("valu*"))); + assertEquals(expected, ft.wildcardQuery("valu*", null, null)); + } +} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/flattened/10_basic.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/flattened/10_basic.yml new file mode 100644 index 00000000000..c750abb7895 --- /dev/null +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/flattened/10_basic.yml @@ -0,0 +1,111 @@ +--- +"Test exists query on flattened object field": + - skip: + version: " - 7.99.99" + reason: "Flat object fields are currently only implemented in 8.0." + + - do: + indices.create: + index: flat_object_test + body: + mappings: + properties: + flat_object: + type: flattened + - do: + index: + index: flat_object_test + id: 1 + body: + flat_object: + key: some_value + refresh: true + + - do: + search: + index: flat_object_test + body: + query: + exists: + field: flat_object + + - match: { hits.total.value: 1 } + + - do: + search: + index: flat_object_test + body: + query: + exists: + field: flat_object.key + + - match: { hits.total.value: 1 } + + - do: + search: + index: flat_object_test + body: + query: + exists: + field: flat_object.nonexistent_key + + - match: { hits.total.value: 0 } + +--- +"Test query string query on flattened object field": + - skip: + version: " - 7.99.99" + reason: "Flat object fields are currently only implemented in 8.0." + + - do: + indices.create: + index: test + body: + mappings: + properties: + headers: + type: flattened + + - do: + index: + index: test + id: 1 + body: + headers: + content-type: application/javascript + origin: elastic.co + refresh: true + + - do: + index: + index: test + id: 2 + body: + headers: + content-type: text/plain + origin: elastic.co + refresh: true + + - do: + search: + index: test + body: + query: + query_string: + query: "headers:text\\/plain" + + - match: { hits.total.value: 1 } + - length: { hits.hits: 1 } + - match: { hits.hits.0._id: "2" } + + - do: + search: + index: test + body: + query: + query_string: + query: "application\\/javascript AND headers.origin:elastic.co" + + - match: { hits.total.value: 1 } + - length: { hits.hits: 1 } + - match: { hits.hits.0._id: "1" }