From f4aee1e583b3a6ad17e879cb3503689726f37679 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 2 Jun 2017 09:21:15 +0200 Subject: [PATCH] Disallow multiple parent-join fields per mapping (#25002) This change ensures that there is a single parent-join field defined per mapping. The verification is done through the addition of a special field mapper (MetaJoinFieldMapper) with a unique name (_parent_join) that is registered to the mapping service when the first parent-join field is defined. If a new parent-join is added, this field mapper will clash with the new one and the update will fail. This change also simplifies the parent join fetch sub phase by retrieving the parent-join field without iterating on all fields in the mapping. --- .../fetch/ParentJoinFieldSubFetchPhase.java | 47 +++---- .../join/mapper/MetaJoinFieldMapper.java | 132 ++++++++++++++++++ .../join/mapper/ParentJoinFieldMapper.java | 53 ++++--- .../mapper/ParentJoinFieldMapperTests.java | 100 +++++++------ 4 files changed, 242 insertions(+), 90 deletions(-) create mode 100644 modules/parent-join/src/main/java/org/elasticsearch/join/mapper/MetaJoinFieldMapper.java diff --git a/modules/parent-join/src/main/java/org/elasticsearch/join/fetch/ParentJoinFieldSubFetchPhase.java b/modules/parent-join/src/main/java/org/elasticsearch/join/fetch/ParentJoinFieldSubFetchPhase.java index 9e9215c834f..2eb0ddc3fe1 100644 --- a/modules/parent-join/src/main/java/org/elasticsearch/join/fetch/ParentJoinFieldSubFetchPhase.java +++ b/modules/parent-join/src/main/java/org/elasticsearch/join/fetch/ParentJoinFieldSubFetchPhase.java @@ -23,10 +23,8 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ExceptionsHelper; -import org.elasticsearch.Version; -import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.index.mapper.DocumentMapper; -import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.join.mapper.ParentIdFieldMapper; import org.elasticsearch.join.mapper.ParentJoinFieldMapper; import org.elasticsearch.search.SearchHitField; import org.elasticsearch.search.fetch.FetchSubPhase; @@ -47,42 +45,31 @@ public final class ParentJoinFieldSubFetchPhase implements FetchSubPhase { if (context.storedFieldsContext() != null && context.storedFieldsContext().fetchFields() == false) { return; } - if (context.mapperService().getIndexSettings().getIndexVersionCreated().before(Version.V_6_0_0_alpha2)) { - return; - } - DocumentMapper docMapper = context.mapperService().documentMapper(hitContext.hit().getType()); - Tuple joinField = null; - Tuple parentField = null; - for (FieldMapper fieldMapper : docMapper.mappers()) { - if (fieldMapper instanceof ParentJoinFieldMapper) { - String joinName = getSortedDocValue(fieldMapper.name(), hitContext.reader(), hitContext.docId()); - if (joinName != null) { - ParentJoinFieldMapper joinFieldMapper = (ParentJoinFieldMapper) fieldMapper; - joinField = new Tuple<>(fieldMapper.name(), joinName); - // we retrieve the parent id only for children. - FieldMapper parentMapper = joinFieldMapper.getParentIdFieldMapper(joinName, false); - if (parentMapper != null) { - String parent = getSortedDocValue(parentMapper.name(), hitContext.reader(), hitContext.docId()); - parentField = new Tuple<>(parentMapper.name(), parent); - } - break; - } - } - } - - if (joinField == null) { + ParentJoinFieldMapper mapper = ParentJoinFieldMapper.getMapper(context.mapperService()); + if (mapper == null) { // hit has no join field. return; } + String joinName = getSortedDocValue(mapper.name(), hitContext.reader(), hitContext.docId()); + if (joinName == null) { + return; + } + + // if the hit is a children we extract the parentId (if it's a parent we can use the _id field directly) + ParentIdFieldMapper parentMapper = mapper.getParentIdFieldMapper(joinName, false); + String parentId = null; + if (parentMapper != null) { + parentId = getSortedDocValue(parentMapper.name(), hitContext.reader(), hitContext.docId()); + } Map fields = hitContext.hit().fieldsOrNull(); if (fields == null) { fields = new HashMap<>(); hitContext.hit().fields(fields); } - fields.put(joinField.v1(), new SearchHitField(joinField.v1(), Collections.singletonList(joinField.v2()))); - if (parentField != null) { - fields.put(parentField.v1(), new SearchHitField(parentField.v1(), Collections.singletonList(parentField.v2()))); + fields.put(mapper.name(), new SearchHitField(mapper.name(), Collections.singletonList(joinName))); + if (parentId != null) { + fields.put(parentMapper.name(), new SearchHitField(parentMapper.name(), Collections.singletonList(parentId))); } } diff --git a/modules/parent-join/src/main/java/org/elasticsearch/join/mapper/MetaJoinFieldMapper.java b/modules/parent-join/src/main/java/org/elasticsearch/join/mapper/MetaJoinFieldMapper.java new file mode 100644 index 00000000000..43608e9b546 --- /dev/null +++ b/modules/parent-join/src/main/java/org/elasticsearch/join/mapper/MetaJoinFieldMapper.java @@ -0,0 +1,132 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.join.mapper; + +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.ParseContext; +import org.elasticsearch.index.mapper.StringFieldType; + +import java.io.IOException; +import java.util.List; + +/** + * Simple field mapper hack to ensure that there is a one and only {@link ParentJoinFieldMapper} per mapping. + * This field mapper is not used to index or query any data, it is used as a marker in the mapping that + * denotes the presence of a parent-join field and forbids the addition of any additional ones. + * This class is also used to quickly retrieve the parent-join field defined in a mapping without + * specifying the name of the field. + */ +class MetaJoinFieldMapper extends FieldMapper { + static final String NAME = "_parent_join"; + static final String CONTENT_TYPE = "parent_join"; + + static class Defaults { + public static final MappedFieldType FIELD_TYPE = new MetaJoinFieldType(); + + static { + FIELD_TYPE.setStored(false); + FIELD_TYPE.setHasDocValues(false); + FIELD_TYPE.setIndexOptions(IndexOptions.NONE); + FIELD_TYPE.freeze(); + } + } + + static class Builder extends FieldMapper.Builder { + Builder() { + super(NAME, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE); + builder = this; + } + + @Override + public MetaJoinFieldMapper build(BuilderContext context) { + fieldType.setName(NAME); + return new MetaJoinFieldMapper(name, fieldType, context.indexSettings()); + } + } + + static final class MetaJoinFieldType extends StringFieldType { + ParentJoinFieldMapper mapper; + + MetaJoinFieldType() {} + + protected MetaJoinFieldType(MetaJoinFieldType ref) { + super(ref); + } + + public MetaJoinFieldType clone() { + return new MetaJoinFieldType(this); + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + @Override + public IndexFieldData.Builder fielddataBuilder() { + failIfNoDocValues(); + return new DocValuesIndexFieldData.Builder(); + } + + @Override + public Object valueForDisplay(Object value) { + if (value == null) { + return null; + } + BytesRef binaryValue = (BytesRef) value; + return binaryValue.utf8ToString(); + } + } + + MetaJoinFieldMapper(String name, MappedFieldType fieldType, Settings indexSettings) { + super(name, fieldType, ParentIdFieldMapper.Defaults.FIELD_TYPE, indexSettings, MultiFields.empty(), null); + } + + void setFieldMapper(ParentJoinFieldMapper mapper) { + fieldType().mapper = mapper; + } + + @Override + public MetaJoinFieldType fieldType() { + return (MetaJoinFieldType) super.fieldType(); + } + + @Override + protected MetaJoinFieldMapper clone() { + return (MetaJoinFieldMapper) super.clone(); + } + + @Override + protected void parseCreateField(ParseContext context, List fields) throws IOException { + throw new IllegalStateException("Should never be called"); + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } +} diff --git a/modules/parent-join/src/main/java/org/elasticsearch/join/mapper/ParentJoinFieldMapper.java b/modules/parent-join/src/main/java/org/elasticsearch/join/mapper/ParentJoinFieldMapper.java index 3f258516a12..e103f803c18 100644 --- a/modules/parent-join/src/main/java/org/elasticsearch/join/mapper/ParentJoinFieldMapper.java +++ b/modules/parent-join/src/main/java/org/elasticsearch/join/mapper/ParentJoinFieldMapper.java @@ -37,6 +37,7 @@ import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MapperParsingException; +import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.ParseContext; import org.elasticsearch.index.mapper.StringFieldType; @@ -51,7 +52,10 @@ import java.util.Set; /** * A {@link FieldMapper} that creates hierarchical joins (parent-join) between documents in the same index. - * TODO Should be restricted to a single join field per index + * Only one parent-join field can be defined per index. The verification of this assumption is done + * through the {@link MetaJoinFieldMapper} which declares a meta field called "_parent_join". + * This field is only used to ensure that there is a single parent-join field defined in the mapping and + * cannot be used to index or query any data. */ public final class ParentJoinFieldMapper extends FieldMapper { public static final String NAME = "join"; @@ -69,11 +73,21 @@ public final class ParentJoinFieldMapper extends FieldMapper { } } - static String getParentIdFieldName(String joinFieldName, String parentName) { + /** + * Returns the {@link ParentJoinFieldMapper} associated with the service or null + * if there is no parent-join field in this mapping. + */ + public static ParentJoinFieldMapper getMapper(MapperService service) { + MetaJoinFieldMapper.MetaJoinFieldType fieldType = + (MetaJoinFieldMapper.MetaJoinFieldType) service.fullName(MetaJoinFieldMapper.NAME); + return fieldType == null ? null : fieldType.mapper; + } + + private static String getParentIdFieldName(String joinFieldName, String parentName) { return joinFieldName + "#" + parentName; } - static void checkPreConditions(Version indexCreatedVersion, ContentPath path, String name) { + private static void checkPreConditions(Version indexCreatedVersion, ContentPath path, String name) { if (indexCreatedVersion.before(Version.V_6_0_0_alpha2)) { throw new IllegalStateException("unable to create join field [" + name + "] for index created before " + Version.V_6_0_0_alpha2); @@ -85,7 +99,7 @@ public final class ParentJoinFieldMapper extends FieldMapper { } } - static void checkParentFields(String name, List mappers) { + private static void checkParentFields(String name, List mappers) { Set children = new HashSet<>(); List conflicts = new ArrayList<>(); for (ParentIdFieldMapper mapper : mappers) { @@ -100,16 +114,10 @@ public final class ParentJoinFieldMapper extends FieldMapper { } } - static void checkDuplicateJoinFields(ParseContext.Document doc) { - if (doc.getFields().stream().anyMatch((m) -> m.fieldType() instanceof JoinFieldType)) { - throw new IllegalStateException("cannot have two join fields in the same document"); - } - } - - public static class Builder extends FieldMapper.Builder { + static class Builder extends FieldMapper.Builder { final List parentIdFieldBuilders = new ArrayList<>(); - public Builder(String name) { + Builder(String name) { super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE); builder = this; } @@ -132,8 +140,9 @@ public final class ParentJoinFieldMapper extends FieldMapper { final List parentIdFields = new ArrayList<>(); parentIdFieldBuilders.stream().map((e) -> e.build(context)).forEach(parentIdFields::add); checkParentFields(name(), parentIdFields); + MetaJoinFieldMapper unique = new MetaJoinFieldMapper.Builder().build(context); return new ParentJoinFieldMapper(name, fieldType, context.indexSettings(), - Collections.unmodifiableList(parentIdFields)); + unique, Collections.unmodifiableList(parentIdFields)); } } @@ -214,14 +223,19 @@ public final class ParentJoinFieldMapper extends FieldMapper { } } + // The meta field that ensures that there is no other parent-join in the mapping + private MetaJoinFieldMapper uniqueFieldMapper; private List parentIdFields; protected ParentJoinFieldMapper(String simpleName, MappedFieldType fieldType, Settings indexSettings, + MetaJoinFieldMapper uniqueFieldMapper, List parentIdFields) { super(simpleName, fieldType, Defaults.FIELD_TYPE, indexSettings, MultiFields.empty(), null); this.parentIdFields = parentIdFields; + this.uniqueFieldMapper = uniqueFieldMapper; + this.uniqueFieldMapper.setFieldMapper(this); } @Override @@ -241,7 +255,9 @@ public final class ParentJoinFieldMapper extends FieldMapper { @Override public Iterator iterator() { - return parentIdFields.stream().map((field) -> (Mapper) field).iterator(); + List mappers = new ArrayList<> (parentIdFields); + mappers.add(uniqueFieldMapper); + return mappers.iterator(); } /** @@ -305,7 +321,7 @@ public final class ParentJoinFieldMapper extends FieldMapper { conflicts.add("cannot remove child [" + child + "] in join field [" + name() + "]"); } } - ParentIdFieldMapper merged = (ParentIdFieldMapper) self.merge(mergeWithMapper, false); + ParentIdFieldMapper merged = (ParentIdFieldMapper) self.merge(mergeWithMapper, updateAllTypes); newParentIdFields.add(merged); } } @@ -313,6 +329,8 @@ public final class ParentJoinFieldMapper extends FieldMapper { throw new IllegalStateException("invalid update for join field [" + name() + "]:\n" + conflicts.toString()); } this.parentIdFields = Collections.unmodifiableList(newParentIdFields); + this.uniqueFieldMapper = (MetaJoinFieldMapper) uniqueFieldMapper.merge(joinMergeWith.uniqueFieldMapper, updateAllTypes); + uniqueFieldMapper.setFieldMapper(this); } @Override @@ -323,6 +341,8 @@ public final class ParentJoinFieldMapper extends FieldMapper { newMappers.add((ParentIdFieldMapper) mapper.updateFieldType(fullNameToFieldType)); } fieldMapper.parentIdFields = Collections.unmodifiableList(newMappers); + this.uniqueFieldMapper = (MetaJoinFieldMapper) uniqueFieldMapper.updateFieldType(fullNameToFieldType); + uniqueFieldMapper.setFieldMapper(this); return fieldMapper; } @@ -333,9 +353,6 @@ public final class ParentJoinFieldMapper extends FieldMapper { @Override public Mapper parse(ParseContext context) throws IOException { - // Only one join field per document - checkDuplicateJoinFields(context.doc()); - context.path().add(simpleName()); XContentParser.Token token = context.parser().currentToken(); String name = null; diff --git a/modules/parent-join/src/test/java/org/elasticsearch/join/mapper/ParentJoinFieldMapperTests.java b/modules/parent-join/src/test/java/org/elasticsearch/join/mapper/ParentJoinFieldMapperTests.java index b0c5eb8e680..161a6f39922 100644 --- a/modules/parent-join/src/test/java/org/elasticsearch/join/mapper/ParentJoinFieldMapperTests.java +++ b/modules/parent-join/src/test/java/org/elasticsearch/join/mapper/ParentJoinFieldMapperTests.java @@ -23,6 +23,7 @@ import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.IndexService; +import org.elasticsearch.index.mapper.DocumentFieldMappers; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.MapperException; import org.elasticsearch.index.mapper.MapperParsingException; @@ -53,8 +54,10 @@ public class ParentJoinFieldMapperTests extends ESSingleNodeTestCase { .endObject() .endObject() .endObject().string(); - DocumentMapper docMapper = createIndex("test") - .mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); + IndexService service = createIndex("test"); + DocumentMapper docMapper = service.mapperService().merge("type", new CompressedXContent(mapping), + MapperService.MergeReason.MAPPING_UPDATE, false); + assertTrue(docMapper.mappers().getMapper("join_field") == ParentJoinFieldMapper.getMapper(service.mapperService())); // Doc without join ParsedDocument doc = docMapper.parse(SourceToParse.source("test", "type", "0", @@ -99,8 +102,10 @@ public class ParentJoinFieldMapperTests extends ESSingleNodeTestCase { .endObject() .endObject() .endObject().string(); - DocumentMapper docMapper = createIndex("test").mapperService() - .documentMapperParser().parse("type", new CompressedXContent(mapping)); + IndexService service = createIndex("test"); + DocumentMapper docMapper = service.mapperService().merge("type", new CompressedXContent(mapping), + MapperService.MergeReason.MAPPING_UPDATE, false); + assertTrue(docMapper.mappers().getMapper("join_field") == ParentJoinFieldMapper.getMapper(service.mapperService())); // Doc without join ParsedDocument doc = docMapper.parse(SourceToParse.source("test", "type", "0", @@ -176,8 +181,9 @@ public class ParentJoinFieldMapperTests extends ESSingleNodeTestCase { .endObject() .endObject().endObject().string(); IndexService indexService = createIndex("test"); - indexService.mapperService().merge("type", new CompressedXContent(mapping), + DocumentMapper docMapper = indexService.mapperService().merge("type", new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE, false); + assertTrue(docMapper.mappers().getMapper("join_field") == ParentJoinFieldMapper.getMapper(indexService.mapperService())); { final String updateMapping = XContentFactory.jsonBuilder().startObject().startObject("properties") @@ -244,10 +250,10 @@ public class ParentJoinFieldMapperTests extends ESSingleNodeTestCase { .array("child", "grand_child1", "grand_child2") .endObject() .endObject().endObject().string(); - indexService.mapperService().merge("type", new CompressedXContent(updateMapping), + docMapper = indexService.mapperService().merge("type", new CompressedXContent(updateMapping), MapperService.MergeReason.MAPPING_UPDATE, true); - ParentJoinFieldMapper mapper = (ParentJoinFieldMapper) indexService.mapperService() - .docMappers(false).iterator().next().mappers().getMapper("join_field"); + assertTrue(docMapper.mappers().getMapper("join_field") == ParentJoinFieldMapper.getMapper(indexService.mapperService())); + ParentJoinFieldMapper mapper = ParentJoinFieldMapper.getMapper(indexService.mapperService()); assertTrue(mapper.hasChild("child2")); assertFalse(mapper.hasParent("child2")); assertTrue(mapper.hasChild("grand_child2")); @@ -263,10 +269,10 @@ public class ParentJoinFieldMapperTests extends ESSingleNodeTestCase { .array("other", "child_other1", "child_other2") .endObject() .endObject().endObject().string(); - indexService.mapperService().merge("type", new CompressedXContent(updateMapping), + docMapper = indexService.mapperService().merge("type", new CompressedXContent(updateMapping), MapperService.MergeReason.MAPPING_UPDATE, true); - ParentJoinFieldMapper mapper = (ParentJoinFieldMapper) indexService.mapperService() - .docMappers(false).iterator().next().mappers().getMapper("join_field"); + assertTrue(docMapper.mappers().getMapper("join_field") == ParentJoinFieldMapper.getMapper(indexService.mapperService())); + ParentJoinFieldMapper mapper = ParentJoinFieldMapper.getMapper(indexService.mapperService()); assertTrue(mapper.hasParent("other")); assertFalse(mapper.hasChild("other")); assertTrue(mapper.hasChild("child_other1")); @@ -316,38 +322,48 @@ public class ParentJoinFieldMapperTests extends ESSingleNodeTestCase { } public void testMultipleJoinFields() throws Exception { - String mapping = XContentFactory.jsonBuilder().startObject() - .startObject("properties") - .startObject("join_field") - .field("type", "join") - .field("parent", "child") - .field("child", "grand_child") + IndexService indexService = createIndex("test"); + { + String mapping = XContentFactory.jsonBuilder().startObject() + .startObject("properties") + .startObject("join_field") + .field("type", "join") + .field("parent", "child") + .field("child", "grand_child") + .endObject() + .startObject("another_join_field") + .field("type", "join") + .field("product", "item") + .endObject() .endObject() - .startObject("another_join_field") - .field("type", "join") - .field("product", "item") + .endObject().string(); + IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> indexService.mapperService().merge("type", + new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE, false)); + assertThat(exc.getMessage(), containsString("Field [_parent_join] is defined twice in [type]")); + } + + { + String mapping = XContentFactory.jsonBuilder().startObject() + .startObject("properties") + .startObject("join_field") + .field("type", "join") + .field("parent", "child") + .field("child", "grand_child") + .endObject() .endObject() - .endObject() - .endObject().string(); - DocumentMapper docMapper = createIndex("test").mapperService() - .documentMapperParser().parse("type", new CompressedXContent(mapping)); - - // Doc without join - ParsedDocument doc = docMapper.parse(SourceToParse.source("test", "type", "0", - XContentFactory.jsonBuilder().startObject().endObject().bytes(), XContentType.JSON)); - assertNull(doc.rootDoc().getBinaryValue("join_field")); - - // Doc parent - MapperParsingException exc = expectThrows(MapperParsingException.class, - () -> docMapper.parse(SourceToParse.source("test", "type", "1", - XContentFactory.jsonBuilder() - .startObject() - .field("join_field", "parent") - .startObject("another_join_field") - .field("name", "item") - .field("parent", "0") - .endObject() - .endObject().bytes(), XContentType.JSON))); - assertThat(exc.getRootCause().getMessage(), containsString("cannot have two join fields in the same document")); + .endObject().string(); + indexService.mapperService().merge("type", + new CompressedXContent(mapping), MapperService.MergeReason.MAPPING_UPDATE, false); + String updateMapping = XContentFactory.jsonBuilder().startObject() + .startObject("properties") + .startObject("another_join_field") + .field("type", "join") + .endObject() + .endObject() + .endObject().string(); + IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> indexService.mapperService().merge("type", + new CompressedXContent(updateMapping), MapperService.MergeReason.MAPPING_UPDATE, false)); + assertThat(exc.getMessage(), containsString("Field [_parent_join] is defined twice in [type]")); + } } }