nested: In case of a single type the _id field should be added to the nested document instead of _uid field.

When `index.mapping.single_type` is `true` the `_uid` field is not used and instead `_id` field is used.
Prior to this change nested documents would in this case still use the `_uid` field to mark to what root
document they belong to. In case of deleting documents this could lead to only the root  Lucene document
to be deleted and not the nested Lucene documents. This broke the docid block ordering the block join
relies on in order to work correctly and thus causing the `nested` query, `nested` aggregation, nested sorting
and nested inner hits to either fail or yield incorrect results.

This bug only manifests in 6.0.0-ALPHA2 release and snaphots (5.5.0-SNAPSHOT, 5.6.0-SNAPSHOT, 6.0.0-SNAPSHOT).
This commit is contained in:
Martijn van Groningen 2017-06-09 12:48:41 +02:00
parent 87d19b21c7
commit c7ae27d57f
No known key found for this signature in database
GPG Key ID: AB236F4FCF2AF12A
4 changed files with 126 additions and 22 deletions

View File

@ -424,15 +424,33 @@ final class DocumentParser {
context = context.createNestedContext(mapper.fullPath());
ParseContext.Document nestedDoc = context.doc();
ParseContext.Document parentDoc = nestedDoc.getParent();
// pre add the uid field if possible (id was already provided)
// We need to add the uid or id to this nested Lucene document too,
// If we do not do this then when a document gets deleted only the root Lucene document gets deleted and
// not the nested Lucene documents! Besides the fact that we would have zombie Lucene documents, the ordering of
// documents inside the Lucene index (document blocks) will be incorrect, as nested documents of different root
// documents are then aligned with other root documents. This will lead tothe nested query, sorting, aggregations
// and inner hits to fail or yield incorrect results.
if (context.mapperService().getIndexSettings().isSingleType()) {
IndexableField idField = parentDoc.getField(IdFieldMapper.NAME);
if (idField != null) {
// We just need to store the id as indexed field, so that IndexWriter#deleteDocuments(term) can then
// delete it when the root document is deleted too.
nestedDoc.add(new Field(IdFieldMapper.NAME, idField.stringValue(), IdFieldMapper.Defaults.NESTED_FIELD_TYPE));
} else {
throw new IllegalStateException("The root document of a nested document should have an id field");
}
} else {
IndexableField uidField = parentDoc.getField(UidFieldMapper.NAME);
if (uidField != null) {
// we don't need to add it as a full uid field in nested docs, since we don't need versioning
// we also rely on this for UidField#loadVersion
// this is a deeply nested field
/// We just need to store the uid as indexed field, so that IndexWriter#deleteDocuments(term) can then
// delete it when the root document is deleted too.
nestedDoc.add(new Field(UidFieldMapper.NAME, uidField.stringValue(), UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
} else {
throw new IllegalStateException("The root document of a nested document should have an uid field");
}
}
// the type of the nested doc starts with __, so we can identify that its a nested one in filters
// note, we don't prefix it with the type of the doc since it allows us to execute a nested query
// across types (for example, with similar nested objects)

View File

@ -52,6 +52,7 @@ public class IdFieldMapper extends MetadataFieldMapper {
public static final String NAME = IdFieldMapper.NAME;
public static final MappedFieldType FIELD_TYPE = new IdFieldType();
public static final MappedFieldType NESTED_FIELD_TYPE;
static {
FIELD_TYPE.setTokenized(false);
@ -62,6 +63,10 @@ public class IdFieldMapper extends MetadataFieldMapper {
FIELD_TYPE.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER);
FIELD_TYPE.setName(NAME);
FIELD_TYPE.freeze();
NESTED_FIELD_TYPE = FIELD_TYPE.clone();
NESTED_FIELD_TYPE.setStored(false);
NESTED_FIELD_TYPE.freeze();
}
}

View File

@ -25,6 +25,7 @@ import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexService;
@ -148,6 +149,92 @@ public class DocumentParserTests extends ESSingleNodeTestCase {
e.getMessage());
}
public void testNestedHaveIdAndTypeFields() throws Exception {
DocumentMapperParser mapperParser1 = createIndex("index1", Settings.builder()
.put("index.mapping.single_type", false).build()
).mapperService().documentMapperParser();
DocumentMapperParser mapperParser2 = createIndex("index2", Settings.builder()
.put("index.mapping.single_type", true).build()
).mapperService().documentMapperParser();
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties");
{
mapping.startObject("foo");
mapping.field("type", "nested");
{
mapping.startObject("properties");
{
mapping.startObject("bar");
mapping.field("type", "keyword");
mapping.endObject();
}
mapping.endObject();
}
mapping.endObject();
}
{
mapping.startObject("baz");
mapping.field("type", "keyword");
mapping.endObject();
}
mapping.endObject().endObject().endObject();
DocumentMapper mapper1 = mapperParser1.parse("type", new CompressedXContent(mapping.string()));
DocumentMapper mapper2 = mapperParser2.parse("type", new CompressedXContent(mapping.string()));
XContentBuilder doc = XContentFactory.jsonBuilder().startObject();
{
doc.startArray("foo");
{
doc.startObject();
doc.field("bar", "value1");
doc.endObject();
}
doc.endArray();
doc.field("baz", "value2");
}
doc.endObject();
// Verify in the case where multiple types are allowed that the _uid field is added to nested documents:
ParsedDocument result = mapper1.parse(SourceToParse.source("index1", "type", "1", doc.bytes(), XContentType.JSON));
assertEquals(2, result.docs().size());
// Nested document:
assertNull(result.docs().get(0).getField(IdFieldMapper.NAME));
assertNotNull(result.docs().get(0).getField(UidFieldMapper.NAME));
assertEquals("type#1", result.docs().get(0).getField(UidFieldMapper.NAME).stringValue());
assertEquals(UidFieldMapper.Defaults.NESTED_FIELD_TYPE, result.docs().get(0).getField(UidFieldMapper.NAME).fieldType());
assertNotNull(result.docs().get(0).getField(TypeFieldMapper.NAME));
assertEquals("__foo", result.docs().get(0).getField(TypeFieldMapper.NAME).stringValue());
assertEquals("value1", result.docs().get(0).getField("foo.bar").binaryValue().utf8ToString());
// Root document:
assertNull(result.docs().get(1).getField(IdFieldMapper.NAME));
assertNotNull(result.docs().get(1).getField(UidFieldMapper.NAME));
assertEquals("type#1", result.docs().get(1).getField(UidFieldMapper.NAME).stringValue());
assertEquals(UidFieldMapper.Defaults.FIELD_TYPE, result.docs().get(1).getField(UidFieldMapper.NAME).fieldType());
assertNotNull(result.docs().get(1).getField(TypeFieldMapper.NAME));
assertEquals("type", result.docs().get(1).getField(TypeFieldMapper.NAME).stringValue());
assertEquals("value2", result.docs().get(1).getField("baz").binaryValue().utf8ToString());
// Verify in the case where only a single type is allowed that the _id field is added to nested documents:
result = mapper2.parse(SourceToParse.source("index2", "type", "1", doc.bytes(), XContentType.JSON));
assertEquals(2, result.docs().size());
// Nested document:
assertNull(result.docs().get(0).getField(UidFieldMapper.NAME));
assertNotNull(result.docs().get(0).getField(IdFieldMapper.NAME));
assertEquals("1", result.docs().get(0).getField(IdFieldMapper.NAME).stringValue());
assertEquals(IdFieldMapper.Defaults.NESTED_FIELD_TYPE, result.docs().get(0).getField(IdFieldMapper.NAME).fieldType());
assertNotNull(result.docs().get(0).getField(TypeFieldMapper.NAME));
assertEquals("__foo", result.docs().get(0).getField(TypeFieldMapper.NAME).stringValue());
assertEquals("value1", result.docs().get(0).getField("foo.bar").binaryValue().utf8ToString());
// Root document:
assertNull(result.docs().get(1).getField(UidFieldMapper.NAME));
assertNotNull(result.docs().get(1).getField(IdFieldMapper.NAME));
assertEquals("1", result.docs().get(1).getField(IdFieldMapper.NAME).stringValue());
assertEquals(IdFieldMapper.Defaults.FIELD_TYPE, result.docs().get(1).getField(IdFieldMapper.NAME).fieldType());
assertNull(result.docs().get(1).getField(TypeFieldMapper.NAME));
assertEquals("value2", result.docs().get(1).getField("baz").binaryValue().utf8ToString());
}
public void testPropagateDynamicWithExistingMapper() throws Exception {
DocumentMapperParser mapperParser = createIndex("test").mapperService().documentMapperParser();
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")

View File

@ -57,9 +57,7 @@ import static org.hamcrest.Matchers.startsWith;
public class SimpleNestedIT extends ESIntegTestCase {
public void testSimpleNested() throws Exception {
assertAcked(prepareCreate("test")
.setSettings("index.mapping.single_type", false)
.addMapping("type1", "nested1", "type=nested")
.addMapping("type2", "nested1", "type=nested"));
.addMapping("type1", "nested1", "type=nested"));
ensureGreen();
// check on no data, see it works
@ -158,10 +156,6 @@ public class SimpleNestedIT extends ESIntegTestCase {
searchResponse = client().prepareSearch("test").setQuery(nestedQuery("nested1", termQuery("nested1.n_field1", "n_value1_1"), ScoreMode.Avg)).execute().actionGet();
assertNoFailures(searchResponse);
assertThat(searchResponse.getHits().getTotalHits(), equalTo(1L));
searchResponse = client().prepareSearch("test").setTypes("type1", "type2").setQuery(nestedQuery("nested1", termQuery("nested1.n_field1", "n_value1_1"), ScoreMode.Avg)).execute().actionGet();
assertNoFailures(searchResponse);
assertThat(searchResponse.getHits().getTotalHits(), equalTo(1L));
}
public void testMultiNested() throws Exception {