From 99b32901d29ac749cb261a866d8a95a9479ebab5 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 29 Jul 2014 11:27:37 +0200 Subject: [PATCH] Mappings: Fix `copy_to` behavior on nested documents. Today, `copy_to` always copies a field to the current document, which is often wrong in the case of nested documents. For example, if you have a nested field called `n` which has a sub-field `n.source` whose content should be copied to `target`, then the latter field should be created in the root document instead of the nested one, since it doesn't have `n.` as a prefix. On the contrary, if you configure the destination field to be `n.target`, then it should go to the nested document. Close #6701 --- .../index/mapper/ParseContext.java | 81 +++++++++++--- .../mapper/core/AbstractFieldMapper.java | 30 +++-- .../index/mapper/object/ObjectMapper.java | 22 ++-- .../mapper/copyto/CopyToMapperTests.java | 105 ++++++++++++++++++ 4 files changed, 205 insertions(+), 33 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/mapper/ParseContext.java b/src/main/java/org/elasticsearch/index/mapper/ParseContext.java index fcbe9dd89df..0b3284cce34 100644 --- a/src/main/java/org/elasticsearch/index/mapper/ParseContext.java +++ b/src/main/java/org/elasticsearch/index/mapper/ParseContext.java @@ -47,11 +47,42 @@ public abstract class ParseContext { /** Fork of {@link org.apache.lucene.document.Document} with additional functionality. */ public static class Document implements Iterable { + private final Document parent; + private final String path; + private final String prefix; private final List fields; private ObjectObjectMap keyedFields; - public Document() { + private Document(String path, Document parent) { fields = Lists.newArrayList(); + this.path = path; + this.prefix = path.isEmpty() ? "" : path + "."; + this.parent = parent; + } + + public Document() { + this("", null); + } + + /** + * Return the path associated with this document. + */ + public String getPath() { + return path; + } + + /** + * Return a prefix that all fields in this document should have. + */ + public String getPrefix() { + return prefix; + } + + /** + * Return the parent document, or null if this is the root document. + */ + public Document getParent() { + return parent; } @Override @@ -64,6 +95,8 @@ public abstract class ParseContext { } public void add(IndexableField field) { + // either a meta fields or starts with the prefix + assert field.name().startsWith("_") || field.name().startsWith(prefix) : field.name() + " " + prefix; fields.add(field); } @@ -240,11 +273,6 @@ public abstract class ParseContext { in.addDoc(doc); } - @Override - public Document switchDoc(Document doc) { - return in.switchDoc(doc); - } - @Override public RootObjectMapper root() { return in.root(); @@ -497,12 +525,6 @@ public abstract class ParseContext { this.documents.add(doc); } - public Document switchDoc(Document doc) { - Document prev = this.document; - this.document = doc; - return prev; - } - public RootObjectMapper root() { return docMapper.root(); } @@ -625,6 +647,39 @@ public abstract class ParseContext { }; } + /** + * Return a new context that will be used within a nested document. + */ + public final ParseContext createNestedContext(String fullPath) { + final Document doc = new Document(fullPath, doc()); + addDoc(doc); + return switchDoc(doc); + } + + /** + * Return a new context that has the provided document as the current document. + */ + public final ParseContext switchDoc(final Document document) { + return new FilterParseContext(this) { + @Override + public Document doc() { + return document; + } + }; + } + + /** + * Return a new context that will have the provided path. + */ + public final ParseContext overridePath(final ContentPath path) { + return new FilterParseContext(this) { + @Override + public ContentPath path() { + return path; + } + }; + } + public boolean isWithinMultiFields() { return false; } @@ -657,8 +712,6 @@ public abstract class ParseContext { public abstract void addDoc(Document doc); - public abstract Document switchDoc(Document doc); - public abstract RootObjectMapper root(); public abstract DocumentMapper docMapper(); diff --git a/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java index fc556f18f49..ed951b9cf20 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java @@ -51,6 +51,7 @@ import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider; import org.elasticsearch.index.codec.postingsformat.PostingsFormatService; import org.elasticsearch.index.fielddata.FieldDataType; import org.elasticsearch.index.mapper.*; +import org.elasticsearch.index.mapper.ParseContext.Document; import org.elasticsearch.index.mapper.internal.AllFieldMapper; import org.elasticsearch.index.mapper.object.ObjectMapper; import org.elasticsearch.index.query.QueryParseContext; @@ -1014,9 +1015,26 @@ public abstract class AbstractFieldMapper implements FieldMapper { * Creates instances of the fields that the current field should be copied to */ public void parse(ParseContext context) throws IOException { - if (!context.isWithinCopyTo()) { + if (!context.isWithinCopyTo() && copyToFields.isEmpty() == false) { + context = context.createCopyToContext(); for (String field : copyToFields) { - parse(field, context); + // In case of a hierarchy of nested documents, we need to figure out + // which document the field should go to + Document targetDoc = null; + for (Document doc = context.doc(); doc != null; doc = doc.getParent()) { + if (field.startsWith(doc.getPrefix())) { + targetDoc = doc; + break; + } + } + assert targetDoc != null; + final ParseContext copyToContext; + if (targetDoc == context.doc()) { + copyToContext = context; + } else { + copyToContext = context.switchDoc(targetDoc); + } + parse(field, copyToContext); } } } @@ -1053,11 +1071,13 @@ public abstract class AbstractFieldMapper implements FieldMapper { * Creates an copy of the current field with given field name and boost */ public void parse(String field, ParseContext context) throws IOException { - context = context.createCopyToContext(); FieldMappers mappers = context.docMapper().mappers().indexName(field); if (mappers != null && !mappers.isEmpty()) { mappers.mapper().parse(context); } else { + // The path of the dest field might be completely different from the current one so we need to reset it + context = context.overridePath(new ContentPath(0)); + int posDot = field.lastIndexOf('.'); if (posDot > 0) { // Compound name @@ -1069,8 +1089,6 @@ public abstract class AbstractFieldMapper implements FieldMapper { throw new MapperParsingException("attempt to copy value to non-existing object [" + field + "]"); } - ContentPath.Type origPathType = context.path().pathType(); - context.path().pathType(ContentPath.Type.FULL); context.path().add(objectPath); // We might be in dynamically created field already, so need to clean withinNewMapper flag @@ -1086,8 +1104,6 @@ public abstract class AbstractFieldMapper implements FieldMapper { } else { context.clearWithinNewMapper(); } - context.path().remove(); - context.path().pathType(origPathType); } } else { diff --git a/src/main/java/org/elasticsearch/index/mapper/object/ObjectMapper.java b/src/main/java/org/elasticsearch/index/mapper/object/ObjectMapper.java index beae08b5dd6..92249324292 100644 --- a/src/main/java/org/elasticsearch/index/mapper/object/ObjectMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/object/ObjectMapper.java @@ -19,7 +19,6 @@ package org.elasticsearch.index.mapper.object; -import com.carrotsearch.hppc.cursors.ObjectObjectCursor; import com.google.common.collect.Iterables; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexableField; @@ -31,11 +30,9 @@ import org.elasticsearch.ElasticsearchIllegalStateException; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Strings; -import org.elasticsearch.common.collect.ImmutableOpenMap; import org.elasticsearch.common.collect.UpdateInPlaceMap; import org.elasticsearch.common.joda.FormatDateTimeFormatter; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; @@ -454,11 +451,12 @@ public class ObjectMapper implements Mapper, AllFieldMapper.IncludeInAll { throw new MapperParsingException("object mapping for [" + name + "] tried to parse as object, but found a concrete value"); } - Document restoreDoc = null; if (nested.isNested()) { - Document nestedDoc = new Document(); + context = context.createNestedContext(fullPath); + Document nestedDoc = context.doc(); + Document parentDoc = nestedDoc.getParent(); // pre add the uid field if possible (id was already provided) - IndexableField uidField = context.doc().getField(UidFieldMapper.NAME); + IndexableField uidField = parentDoc.getField(UidFieldMapper.NAME); if (uidField != null) { // we don't need to add it as a full uid field in nested docs, since we don't need versioning // we also rely on this for UidField#loadVersion @@ -470,8 +468,6 @@ public class ObjectMapper implements Mapper, AllFieldMapper.IncludeInAll { // note, we don't prefix it with the type of the doc since it allows us to execute a nested query // across types (for example, with similar nested objects) nestedDoc.add(new Field(TypeFieldMapper.NAME, nestedTypePathAsString, TypeFieldMapper.Defaults.FIELD_TYPE)); - restoreDoc = context.switchDoc(nestedDoc); - context.addDoc(nestedDoc); } ContentPath.Type origPathType = context.path().pathType(); @@ -505,24 +501,26 @@ public class ObjectMapper implements Mapper, AllFieldMapper.IncludeInAll { // restore the enable path flag context.path().pathType(origPathType); if (nested.isNested()) { - Document nestedDoc = context.switchDoc(restoreDoc); + Document nestedDoc = context.doc(); + Document parentDoc = nestedDoc.getParent(); if (nested.isIncludeInParent()) { for (IndexableField field : nestedDoc.getFields()) { if (field.name().equals(UidFieldMapper.NAME) || field.name().equals(TypeFieldMapper.NAME)) { continue; } else { - context.doc().add(field); + parentDoc.add(field); } } } if (nested.isIncludeInRoot()) { + Document rootDoc = context.rootDoc(); // don't add it twice, if its included in parent, and we are handling the master doc... - if (!(nested.isIncludeInParent() && context.doc() == context.rootDoc())) { + if (!nested.isIncludeInParent() || parentDoc != rootDoc) { for (IndexableField field : nestedDoc.getFields()) { if (field.name().equals(UidFieldMapper.NAME) || field.name().equals(TypeFieldMapper.NAME)) { continue; } else { - context.rootDoc().add(field); + rootDoc.add(field); } } } diff --git a/src/test/java/org/elasticsearch/index/mapper/copyto/CopyToMapperTests.java b/src/test/java/org/elasticsearch/index/mapper/copyto/CopyToMapperTests.java index 64fce309d54..ae108a9ad54 100644 --- a/src/test/java/org/elasticsearch/index/mapper/copyto/CopyToMapperTests.java +++ b/src/test/java/org/elasticsearch/index/mapper/copyto/CopyToMapperTests.java @@ -20,13 +20,17 @@ package org.elasticsearch.index.mapper.copyto; import com.google.common.collect.ImmutableList; +import org.apache.lucene.index.IndexableField; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.index.mapper.*; +import org.elasticsearch.index.mapper.ParseContext.Document; import org.elasticsearch.index.mapper.core.LongFieldMapper; import org.elasticsearch.index.mapper.core.StringFieldMapper; +import org.elasticsearch.index.service.IndexService; import org.elasticsearch.test.ElasticsearchSingleNodeTest; import org.junit.Test; @@ -223,4 +227,105 @@ public class CopyToMapperTests extends ElasticsearchSingleNodeTest { assertThat(fields.get(1), equalTo("bar")); } + public void testCopyToNestedField() throws Exception { + IndexService indexService = createIndex("test"); + DocumentMapperParser parser = indexService.mapperService().documentMapperParser(); + for (boolean mapped : new boolean[] {true, false}) { + XContentBuilder mapping = jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("n1") + .field("type", "nested") + .startObject("properties") + .startObject("n2") + .field("type", "nested") + .startObject("properties") + .startObject("source") + .field("type", "long") + .startArray("copy_to") + .value("target") // should go to the root doc + .value("n1.target") // should go to the parent doc + .value("n1.n2.target") // should go to the current doc + .endArray() + .endObject(); + for (int i = 0; i < 3; ++i) { + if (mapped) { + mapping = mapping.startObject("target").field("type", "long").endObject(); + } + mapping = mapping.endObject().endObject(); + } + mapping = mapping.endObject(); + + DocumentMapper mapper = parser.parse(mapping.string()); + + XContentBuilder jsonDoc = XContentFactory.jsonBuilder() + .startObject() + .startArray("n1") + .startObject() + .startArray("n2") + .startObject() + .field("source", 3) + .endObject() + .startObject() + .field("source", 5) + .endObject() + .endArray() + .endObject() + .startObject() + .startArray("n2") + .startObject() + .field("source", 7) + .endObject() + .endArray() + .endObject() + .endArray() + .endObject(); + + ParsedDocument doc = mapper.parse("type", "1", jsonDoc.bytes()); + assertEquals(6, doc.docs().size()); + + Document nested = doc.docs().get(0); + assertFieldValue(nested, "n1.n2.target", 7L); + assertFieldValue(nested, "n1.target"); + assertFieldValue(nested, "target"); + + nested = doc.docs().get(2); + assertFieldValue(nested, "n1.n2.target", 5L); + assertFieldValue(nested, "n1.target"); + assertFieldValue(nested, "target"); + + nested = doc.docs().get(3); + assertFieldValue(nested, "n1.n2.target", 3L); + assertFieldValue(nested, "n1.target"); + assertFieldValue(nested, "target"); + + Document parent = doc.docs().get(1); + assertFieldValue(parent, "target"); + assertFieldValue(parent, "n1.target", 7L); + assertFieldValue(parent, "n1.n2.target"); + + parent = doc.docs().get(4); + assertFieldValue(parent, "target"); + assertFieldValue(parent, "n1.target", 3L, 5L); + assertFieldValue(parent, "n1.n2.target"); + + Document root = doc.docs().get(5); + assertFieldValue(root, "target", 3L, 5L, 7L); + assertFieldValue(root, "n1.target"); + assertFieldValue(root, "n1.n2.target"); + } + } + + private void assertFieldValue(Document doc, String field, Number... expected) { + IndexableField[] values = doc.getFields(field); + if (values == null) { + values = new IndexableField[0]; + } + Number[] actual = new Number[values.length]; + for (int i = 0; i < values.length; ++i) { + actual[i] = values[i].numericValue(); + } + assertArrayEquals(expected, actual); + } + }