Mappings: Fix `copy_to` behavior on nested documents.

Today, `copy_to` always copies a field to the current document, which is often
wrong in the case of nested documents. For example, if you have a nested field
called `n` which has a sub-field `n.source` whose content should be copied to
`target`, then the latter field should be created in the root document instead
of the nested one, since it doesn't have `n.` as a prefix. On the contrary, if
you configure the destination field to be `n.target`, then it should go to the
nested document.

Close #6701
This commit is contained in:
Adrien Grand 2014-07-29 11:27:37 +02:00
parent 1d581e6286
commit 99b32901d2
4 changed files with 205 additions and 33 deletions

View File

@ -47,11 +47,42 @@ public abstract class ParseContext {
/** Fork of {@link org.apache.lucene.document.Document} with additional functionality. */
public static class Document implements Iterable<IndexableField> {
private final Document parent;
private final String path;
private final String prefix;
private final List<IndexableField> fields;
private ObjectObjectMap<Object, IndexableField> keyedFields;
public Document() {
private Document(String path, Document parent) {
fields = Lists.newArrayList();
this.path = path;
this.prefix = path.isEmpty() ? "" : path + ".";
this.parent = parent;
}
public Document() {
this("", null);
}
/**
* Return the path associated with this document.
*/
public String getPath() {
return path;
}
/**
* Return a prefix that all fields in this document should have.
*/
public String getPrefix() {
return prefix;
}
/**
* Return the parent document, or null if this is the root document.
*/
public Document getParent() {
return parent;
}
@Override
@ -64,6 +95,8 @@ public abstract class ParseContext {
}
public void add(IndexableField field) {
// either a meta fields or starts with the prefix
assert field.name().startsWith("_") || field.name().startsWith(prefix) : field.name() + " " + prefix;
fields.add(field);
}
@ -240,11 +273,6 @@ public abstract class ParseContext {
in.addDoc(doc);
}
@Override
public Document switchDoc(Document doc) {
return in.switchDoc(doc);
}
@Override
public RootObjectMapper root() {
return in.root();
@ -497,12 +525,6 @@ public abstract class ParseContext {
this.documents.add(doc);
}
public Document switchDoc(Document doc) {
Document prev = this.document;
this.document = doc;
return prev;
}
public RootObjectMapper root() {
return docMapper.root();
}
@ -625,6 +647,39 @@ public abstract class ParseContext {
};
}
/**
* Return a new context that will be used within a nested document.
*/
public final ParseContext createNestedContext(String fullPath) {
final Document doc = new Document(fullPath, doc());
addDoc(doc);
return switchDoc(doc);
}
/**
* Return a new context that has the provided document as the current document.
*/
public final ParseContext switchDoc(final Document document) {
return new FilterParseContext(this) {
@Override
public Document doc() {
return document;
}
};
}
/**
* Return a new context that will have the provided path.
*/
public final ParseContext overridePath(final ContentPath path) {
return new FilterParseContext(this) {
@Override
public ContentPath path() {
return path;
}
};
}
public boolean isWithinMultiFields() {
return false;
}
@ -657,8 +712,6 @@ public abstract class ParseContext {
public abstract void addDoc(Document doc);
public abstract Document switchDoc(Document doc);
public abstract RootObjectMapper root();
public abstract DocumentMapper docMapper();

View File

@ -51,6 +51,7 @@ import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatService;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.mapper.*;
import org.elasticsearch.index.mapper.ParseContext.Document;
import org.elasticsearch.index.mapper.internal.AllFieldMapper;
import org.elasticsearch.index.mapper.object.ObjectMapper;
import org.elasticsearch.index.query.QueryParseContext;
@ -1014,9 +1015,26 @@ public abstract class AbstractFieldMapper<T> implements FieldMapper<T> {
* Creates instances of the fields that the current field should be copied to
*/
public void parse(ParseContext context) throws IOException {
if (!context.isWithinCopyTo()) {
if (!context.isWithinCopyTo() && copyToFields.isEmpty() == false) {
context = context.createCopyToContext();
for (String field : copyToFields) {
parse(field, context);
// In case of a hierarchy of nested documents, we need to figure out
// which document the field should go to
Document targetDoc = null;
for (Document doc = context.doc(); doc != null; doc = doc.getParent()) {
if (field.startsWith(doc.getPrefix())) {
targetDoc = doc;
break;
}
}
assert targetDoc != null;
final ParseContext copyToContext;
if (targetDoc == context.doc()) {
copyToContext = context;
} else {
copyToContext = context.switchDoc(targetDoc);
}
parse(field, copyToContext);
}
}
}
@ -1053,11 +1071,13 @@ public abstract class AbstractFieldMapper<T> implements FieldMapper<T> {
* Creates an copy of the current field with given field name and boost
*/
public void parse(String field, ParseContext context) throws IOException {
context = context.createCopyToContext();
FieldMappers mappers = context.docMapper().mappers().indexName(field);
if (mappers != null && !mappers.isEmpty()) {
mappers.mapper().parse(context);
} else {
// The path of the dest field might be completely different from the current one so we need to reset it
context = context.overridePath(new ContentPath(0));
int posDot = field.lastIndexOf('.');
if (posDot > 0) {
// Compound name
@ -1069,8 +1089,6 @@ public abstract class AbstractFieldMapper<T> implements FieldMapper<T> {
throw new MapperParsingException("attempt to copy value to non-existing object [" + field + "]");
}
ContentPath.Type origPathType = context.path().pathType();
context.path().pathType(ContentPath.Type.FULL);
context.path().add(objectPath);
// We might be in dynamically created field already, so need to clean withinNewMapper flag
@ -1086,8 +1104,6 @@ public abstract class AbstractFieldMapper<T> implements FieldMapper<T> {
} else {
context.clearWithinNewMapper();
}
context.path().remove();
context.path().pathType(origPathType);
}
} else {

View File

@ -19,7 +19,6 @@
package org.elasticsearch.index.mapper.object;
import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
import com.google.common.collect.Iterables;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexableField;
@ -31,11 +30,9 @@ import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.collect.UpdateInPlaceMap;
import org.elasticsearch.common.joda.FormatDateTimeFormatter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
@ -454,11 +451,12 @@ public class ObjectMapper implements Mapper, AllFieldMapper.IncludeInAll {
throw new MapperParsingException("object mapping for [" + name + "] tried to parse as object, but found a concrete value");
}
Document restoreDoc = null;
if (nested.isNested()) {
Document nestedDoc = new Document();
context = context.createNestedContext(fullPath);
Document nestedDoc = context.doc();
Document parentDoc = nestedDoc.getParent();
// pre add the uid field if possible (id was already provided)
IndexableField uidField = context.doc().getField(UidFieldMapper.NAME);
IndexableField uidField = parentDoc.getField(UidFieldMapper.NAME);
if (uidField != null) {
// we don't need to add it as a full uid field in nested docs, since we don't need versioning
// we also rely on this for UidField#loadVersion
@ -470,8 +468,6 @@ public class ObjectMapper implements Mapper, AllFieldMapper.IncludeInAll {
// note, we don't prefix it with the type of the doc since it allows us to execute a nested query
// across types (for example, with similar nested objects)
nestedDoc.add(new Field(TypeFieldMapper.NAME, nestedTypePathAsString, TypeFieldMapper.Defaults.FIELD_TYPE));
restoreDoc = context.switchDoc(nestedDoc);
context.addDoc(nestedDoc);
}
ContentPath.Type origPathType = context.path().pathType();
@ -505,24 +501,26 @@ public class ObjectMapper implements Mapper, AllFieldMapper.IncludeInAll {
// restore the enable path flag
context.path().pathType(origPathType);
if (nested.isNested()) {
Document nestedDoc = context.switchDoc(restoreDoc);
Document nestedDoc = context.doc();
Document parentDoc = nestedDoc.getParent();
if (nested.isIncludeInParent()) {
for (IndexableField field : nestedDoc.getFields()) {
if (field.name().equals(UidFieldMapper.NAME) || field.name().equals(TypeFieldMapper.NAME)) {
continue;
} else {
context.doc().add(field);
parentDoc.add(field);
}
}
}
if (nested.isIncludeInRoot()) {
Document rootDoc = context.rootDoc();
// don't add it twice, if its included in parent, and we are handling the master doc...
if (!(nested.isIncludeInParent() && context.doc() == context.rootDoc())) {
if (!nested.isIncludeInParent() || parentDoc != rootDoc) {
for (IndexableField field : nestedDoc.getFields()) {
if (field.name().equals(UidFieldMapper.NAME) || field.name().equals(TypeFieldMapper.NAME)) {
continue;
} else {
context.rootDoc().add(field);
rootDoc.add(field);
}
}
}

View File

@ -20,13 +20,17 @@
package org.elasticsearch.index.mapper.copyto;
import com.google.common.collect.ImmutableList;
import org.apache.lucene.index.IndexableField;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.index.mapper.*;
import org.elasticsearch.index.mapper.ParseContext.Document;
import org.elasticsearch.index.mapper.core.LongFieldMapper;
import org.elasticsearch.index.mapper.core.StringFieldMapper;
import org.elasticsearch.index.service.IndexService;
import org.elasticsearch.test.ElasticsearchSingleNodeTest;
import org.junit.Test;
@ -223,4 +227,105 @@ public class CopyToMapperTests extends ElasticsearchSingleNodeTest {
assertThat(fields.get(1), equalTo("bar"));
}
public void testCopyToNestedField() throws Exception {
IndexService indexService = createIndex("test");
DocumentMapperParser parser = indexService.mapperService().documentMapperParser();
for (boolean mapped : new boolean[] {true, false}) {
XContentBuilder mapping = jsonBuilder().startObject()
.startObject("type")
.startObject("properties")
.startObject("n1")
.field("type", "nested")
.startObject("properties")
.startObject("n2")
.field("type", "nested")
.startObject("properties")
.startObject("source")
.field("type", "long")
.startArray("copy_to")
.value("target") // should go to the root doc
.value("n1.target") // should go to the parent doc
.value("n1.n2.target") // should go to the current doc
.endArray()
.endObject();
for (int i = 0; i < 3; ++i) {
if (mapped) {
mapping = mapping.startObject("target").field("type", "long").endObject();
}
mapping = mapping.endObject().endObject();
}
mapping = mapping.endObject();
DocumentMapper mapper = parser.parse(mapping.string());
XContentBuilder jsonDoc = XContentFactory.jsonBuilder()
.startObject()
.startArray("n1")
.startObject()
.startArray("n2")
.startObject()
.field("source", 3)
.endObject()
.startObject()
.field("source", 5)
.endObject()
.endArray()
.endObject()
.startObject()
.startArray("n2")
.startObject()
.field("source", 7)
.endObject()
.endArray()
.endObject()
.endArray()
.endObject();
ParsedDocument doc = mapper.parse("type", "1", jsonDoc.bytes());
assertEquals(6, doc.docs().size());
Document nested = doc.docs().get(0);
assertFieldValue(nested, "n1.n2.target", 7L);
assertFieldValue(nested, "n1.target");
assertFieldValue(nested, "target");
nested = doc.docs().get(2);
assertFieldValue(nested, "n1.n2.target", 5L);
assertFieldValue(nested, "n1.target");
assertFieldValue(nested, "target");
nested = doc.docs().get(3);
assertFieldValue(nested, "n1.n2.target", 3L);
assertFieldValue(nested, "n1.target");
assertFieldValue(nested, "target");
Document parent = doc.docs().get(1);
assertFieldValue(parent, "target");
assertFieldValue(parent, "n1.target", 7L);
assertFieldValue(parent, "n1.n2.target");
parent = doc.docs().get(4);
assertFieldValue(parent, "target");
assertFieldValue(parent, "n1.target", 3L, 5L);
assertFieldValue(parent, "n1.n2.target");
Document root = doc.docs().get(5);
assertFieldValue(root, "target", 3L, 5L, 7L);
assertFieldValue(root, "n1.target");
assertFieldValue(root, "n1.n2.target");
}
}
private void assertFieldValue(Document doc, String field, Number... expected) {
IndexableField[] values = doc.getFields(field);
if (values == null) {
values = new IndexableField[0];
}
Number[] actual = new Number[values.length];
for (int i = 0; i < values.length; ++i) {
actual[i] = values[i].numericValue();
}
assertArrayEquals(expected, actual);
}
}