From f959d39ac3a7dbc54f93764c4b3408d7c6f346d6 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 21 Jan 2016 10:24:35 +0100 Subject: [PATCH] Fix default doc values to be enabled when a field is not indexed. Doc values currently default to `true` if the field is indexed and not analyzed. So setting `index:no` automatically disables doc values, which is not explicit in the documentation. This commit makes doc values default to true for numerics, booleans regardless of whether they are indexed. Not indexed strings still don't have doc values, since we can't know whether it is rather a text or keyword field. This potential source of confusion should go away when we split `string` into `text` and `keyword`. --- .../index/mapper/FieldMapper.java | 19 +++-- .../index/mapper/core/BooleanFieldMapper.java | 4 +- .../index/mapper/core/StringFieldMapper.java | 1 + .../mapper/core/BooleanFieldMapperTests.java | 81 +++++++++++++++++++ .../mapper/numeric/SimpleNumericTests.java | 71 +++++++++++++--- .../string/SimpleStringMappingTests.java | 72 +++++++++++++++++ docs/reference/migration/migrate_3_0.asciidoc | 5 ++ 7 files changed, 231 insertions(+), 22 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index d0d75709767..3725327c235 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -24,6 +24,7 @@ import com.carrotsearch.hppc.cursors.ObjectObjectCursor; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; +import org.elasticsearch.Version; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.collect.ImmutableOpenMap; import org.elasticsearch.common.lucene.Lucene; @@ -223,6 +224,15 @@ public abstract class FieldMapper extends Mapper implements Cloneable { return context.path().pathAsText(name); } + protected boolean defaultDocValues(Version indexCreated) { + if (indexCreated.onOrAfter(Version.V_3_0_0)) { + // add doc values by default to keyword (boolean, numerics, etc.) fields + return fieldType.tokenized() == false; + } else { + return fieldType.tokenized() == false && fieldType.indexOptions() != IndexOptions.NONE; + } + } + protected void setupFieldType(BuilderContext context) { fieldType.setName(buildFullName(context)); if (fieldType.indexAnalyzer() == null && fieldType.tokenized() == false && fieldType.indexOptions() != IndexOptions.NONE) { @@ -233,14 +243,7 @@ public abstract class FieldMapper extends Mapper implements Cloneable { Settings settings = Settings.builder().put(fieldType.fieldDataType().getSettings()).put(fieldDataSettings).build(); fieldType.setFieldDataType(new FieldDataType(fieldType.fieldDataType().getType(), settings)); } - boolean defaultDocValues = fieldType.tokenized() == false && fieldType.indexOptions() != IndexOptions.NONE; - // backcompat for "fielddata: format: docvalues" for now... - boolean fieldDataDocValues = fieldType.fieldDataType() != null - && FieldDataType.DOC_VALUES_FORMAT_VALUE.equals(fieldType.fieldDataType().getFormat(context.indexSettings())); - if (fieldDataDocValues && docValuesSet && fieldType.hasDocValues() == false) { - // this forces the doc_values setting to be written, so fielddata does not mask the original setting - defaultDocValues = true; - } + boolean defaultDocValues = defaultDocValues(context.indexCreatedVersion()); defaultFieldType.setHasDocValues(defaultDocValues); if (docValuesSet == false) { fieldType.setHasDocValues(defaultDocValues || fieldDataDocValues); diff --git a/core/src/main/java/org/elasticsearch/index/mapper/core/BooleanFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/core/BooleanFieldMapper.java index 76f8eb34a71..482ff694f7c 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/core/BooleanFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/core/BooleanFieldMapper.java @@ -225,7 +225,9 @@ public class BooleanFieldMapper extends FieldMapper { if (value == null) { return; } - fields.add(new Field(fieldType().name(), value ? "T" : "F", fieldType())); + if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) { + fields.add(new Field(fieldType().name(), value ? "T" : "F", fieldType())); + } if (fieldType().hasDocValues()) { fields.add(new SortedNumericDocValuesField(fieldType().name(), value ? 1 : 0)); } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java index 46b4097c2c0..4471db8954d 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java @@ -31,6 +31,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.fielddata.FieldDataType; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.Mapper; diff --git a/core/src/test/java/org/elasticsearch/index/mapper/core/BooleanFieldMapperTests.java b/core/src/test/java/org/elasticsearch/index/mapper/core/BooleanFieldMapperTests.java index 2175f2ce3e7..d66b1508dc3 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/core/BooleanFieldMapperTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/core/BooleanFieldMapperTests.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.mapper.core; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReader; @@ -28,8 +29,11 @@ import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; @@ -39,10 +43,15 @@ import org.elasticsearch.index.mapper.DocumentMapperParser; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.index.mapper.ParseContext.Document; +import org.elasticsearch.index.mapper.string.SimpleStringMappingTests; +import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.test.InternalSettingsPlugin; import org.junit.Before; import java.io.IOException; +import java.util.Collection; public class BooleanFieldMapperTests extends ESSingleNodeTestCase { @@ -55,6 +64,11 @@ public class BooleanFieldMapperTests extends ESSingleNodeTestCase { parser = indexService.mapperService().documentMapperParser(); } + @Override + protected Collection> getPlugins() { + return pluginList(InternalSettingsPlugin.class); + } + public void testDefaults() throws IOException { String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("field").field("type", "boolean").endObject().endObject() @@ -135,4 +149,71 @@ public class BooleanFieldMapperTests extends ESSingleNodeTestCase { ParsedDocument doc = mapper.parse("test", "type", "1", source); assertNotNull(doc.rootDoc().getField("field.as_string")); } + + public void testDocValues() throws Exception { + String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("bool1") + .field("type", "boolean") + .endObject() + .startObject("bool2") + .field("type", "boolean") + .field("index", false) + .endObject() + .startObject("bool3") + .field("type", "boolean") + .field("index", true) + .endObject() + .endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); + + ParsedDocument parsedDoc = defaultMapper.parse("test", "type", "1", XContentFactory.jsonBuilder() + .startObject() + .field("bool1", true) + .field("bool2", true) + .field("bool3", true) + .endObject() + .bytes()); + Document doc = parsedDoc.rootDoc(); + assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "bool1")); + assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "bool2")); + assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "bool3")); + } + + public void testBwCompatDocValues() throws Exception { + Settings oldIndexSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_2_2_0).build(); + indexService = createIndex("test_old", oldIndexSettings); + parser = indexService.mapperService().documentMapperParser(); + String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("bool1") + .field("type", "boolean") + .endObject() + .startObject("bool2") + .field("type", "boolean") + .field("index", "no") + .endObject() + .startObject("bool3") + .field("type", "boolean") + .field("index", "not_analyzed") + .endObject() + .endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); + + ParsedDocument parsedDoc = defaultMapper.parse("test", "type", "1", XContentFactory.jsonBuilder() + .startObject() + .field("bool1", true) + .field("bool2", true) + .field("bool3", true) + .endObject() + .bytes()); + Document doc = parsedDoc.rootDoc(); + assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "bool1")); + assertEquals(DocValuesType.NONE, SimpleStringMappingTests.docValuesType(doc, "bool2")); + assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "bool3")); + } } diff --git a/core/src/test/java/org/elasticsearch/index/mapper/numeric/SimpleNumericTests.java b/core/src/test/java/org/elasticsearch/index/mapper/numeric/SimpleNumericTests.java index e68817e9ea0..1c024a0d549 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/numeric/SimpleNumericTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/numeric/SimpleNumericTests.java @@ -281,17 +281,19 @@ public class SimpleNumericTests extends ESSingleNodeTestCase { public void testDocValues() throws Exception { String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties") - .startObject("int") + .startObject("int1") .field("type", "integer") - .startObject("fielddata") - .field("format", "doc_values") - .endObject() .endObject() - .startObject("double") + .startObject("int2") + .field("type", "integer") + .field("index", false) + .endObject() + .startObject("double1") .field("type", "double") - .startObject("fielddata") - .field("format", "doc_values") - .endObject() + .endObject() + .startObject("double2") + .field("type", "integer") + .field("index", false) .endObject() .endObject() .endObject().endObject().string(); @@ -300,13 +302,56 @@ public class SimpleNumericTests extends ESSingleNodeTestCase { ParsedDocument parsedDoc = defaultMapper.parse("test", "type", "1", XContentFactory.jsonBuilder() .startObject() - .field("int", "1234") - .field("double", "1234") + .field("int1", "1234") + .field("double1", "1234") + .field("int2", "1234") + .field("double2", "1234") .endObject() .bytes()); - final Document doc = parsedDoc.rootDoc(); - assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "int")); - assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "double")); + Document doc = parsedDoc.rootDoc(); + assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "int1")); + assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "double1")); + assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "int2")); + assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "double2")); + + } + + public void testBwCompatDocValues() throws Exception { + String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("int1") + .field("type", "integer") + .endObject() + .startObject("int2") + .field("type", "integer") + .field("index", "no") + .endObject() + .startObject("double1") + .field("type", "double") + .endObject() + .startObject("double2") + .field("type", "integer") + .field("index", "no") + .endObject() + .endObject() + .endObject().endObject().string(); + + Settings oldIndexSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_2_2_0).build(); + DocumentMapper defaultMapper = createIndex("test", oldIndexSettings).mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); + + ParsedDocument parsedDoc = defaultMapper.parse("test", "type", "1", XContentFactory.jsonBuilder() + .startObject() + .field("int1", "1234") + .field("double1", "1234") + .field("int2", "1234") + .field("double2", "1234") + .endObject() + .bytes()); + Document doc = parsedDoc.rootDoc(); + assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "int1")); + assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "double1")); + assertEquals(DocValuesType.NONE, SimpleStringMappingTests.docValuesType(doc, "int2")); + assertEquals(DocValuesType.NONE, SimpleStringMappingTests.docValuesType(doc, "double2")); } public void testDocValuesOnNested() throws Exception { diff --git a/core/src/test/java/org/elasticsearch/index/mapper/string/SimpleStringMappingTests.java b/core/src/test/java/org/elasticsearch/index/mapper/string/SimpleStringMappingTests.java index d32dcad5434..1f3ba975d02 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/string/SimpleStringMappingTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/string/SimpleStringMappingTests.java @@ -45,11 +45,14 @@ import org.elasticsearch.index.mapper.ParseContext.Document; import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.core.StringFieldMapper; import org.elasticsearch.index.mapper.core.StringFieldMapper.Builder; +import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.test.InternalSettingsPlugin; import org.elasticsearch.test.VersionUtils; import org.junit.Before; import java.util.Arrays; +import java.util.Collection; import java.util.Map; import static java.util.Collections.emptyMap; @@ -63,6 +66,11 @@ import static org.hamcrest.Matchers.nullValue; public class SimpleStringMappingTests extends ESSingleNodeTestCase { private static Settings DOC_VALUES_SETTINGS = Settings.builder().put(FieldDataType.FORMAT_KEY, FieldDataType.DOC_VALUES_FORMAT_VALUE).build(); + @Override + protected Collection> getPlugins() { + return pluginList(InternalSettingsPlugin.class); + } + IndexService indexService; DocumentMapperParser parser; @@ -417,6 +425,70 @@ public class SimpleStringMappingTests extends ESSingleNodeTestCase { assertFalse(new Builder("anything").index(false).tokenized(false).docValues(false).build(ctx).fieldType().hasDocValues()); assertTrue(new Builder("anything").index(false).docValues(true).build(ctx).fieldType().hasDocValues()); + String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("str1") + .field("type", "string") + .field("index", "no") + .endObject() + .startObject("str2") + .field("type", "string") + .field("index", "not_analyzed") + .endObject() + .startObject("str3") + .field("type", "string") + .field("index", "analyzed") + .endObject() + .startObject("str4") + .field("type", "string") + .field("index", "not_analyzed") + .field("doc_values", false) + .endObject() + .startObject("str5") + .field("type", "string") + .field("index", "no") + .field("doc_values", false) + .endObject() + .endObject() + .endObject().endObject().string(); + + DocumentMapper defaultMapper = parser.parse("type", new CompressedXContent(mapping)); + + ParsedDocument parsedDoc = defaultMapper.parse("test", "type", "1", XContentFactory.jsonBuilder() + .startObject() + .field("str1", "1234") + .field("str2", "1234") + .field("str3", "1234") + .field("str4", "1234") + .field("str5", "1234") + .endObject() + .bytes()); + final Document doc = parsedDoc.rootDoc(); + assertEquals(DocValuesType.NONE, docValuesType(doc, "str1")); + assertEquals(DocValuesType.SORTED_SET, docValuesType(doc, "str2")); + assertEquals(DocValuesType.NONE, docValuesType(doc, "str3")); + assertEquals(DocValuesType.NONE, docValuesType(doc, "str4")); + assertEquals(DocValuesType.NONE, docValuesType(doc, "str5")); + + } + + public void testBwCompatDocValues() throws Exception { + Settings oldIndexSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_2_2_0).build(); + indexService = createIndex("test_old", oldIndexSettings); + parser = indexService.mapperService().documentMapperParser(); + // doc values only work on non-analyzed content + final BuilderContext ctx = new BuilderContext(indexService.getIndexSettings().getSettings(), new ContentPath(1)); + try { + new StringFieldMapper.Builder("anything").docValues(true).build(ctx); + fail(); + } catch (Exception e) { /* OK */ } + + assertFalse(new Builder("anything").index(false).build(ctx).fieldType().hasDocValues()); + assertTrue(new Builder("anything").index(true).tokenized(false).build(ctx).fieldType().hasDocValues()); + assertFalse(new Builder("anything").index(true).tokenized(true).build(ctx).fieldType().hasDocValues()); + assertFalse(new Builder("anything").index(false).tokenized(false).docValues(false).build(ctx).fieldType().hasDocValues()); + assertTrue(new Builder("anything").index(false).docValues(true).build(ctx).fieldType().hasDocValues()); + String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties") .startObject("str1") diff --git a/docs/reference/migration/migrate_3_0.asciidoc b/docs/reference/migration/migrate_3_0.asciidoc index 699805ac7e1..9b7de2aff19 100644 --- a/docs/reference/migration/migrate_3_0.asciidoc +++ b/docs/reference/migration/migrate_3_0.asciidoc @@ -257,6 +257,11 @@ amount of heap as long as the total indexing buffer heap used across all shards [[breaking_30_mapping_changes]] === Mapping changes +==== Default doc values settings + +Doc values are now also on by default on numeric and boolean fields that are +not indexed. + ==== Transform removed The `transform` feature from mappings has been removed. It made issues very hard to debug.