Fix default doc values to be enabled when a field is not indexed.

Doc values currently default to `true` if the field is indexed and not analyzed.
So setting `index:no` automatically disables doc values, which is not explicit
in the documentation.

This commit makes doc values default to true for numerics, booleans regardless
of whether they are indexed. Not indexed strings still don't have doc values,
since we can't know whether it is rather a text or keyword field. This
potential source of confusion should go away when we split `string` into `text`
and `keyword`.
This commit is contained in:
Adrien Grand 2016-01-21 10:24:35 +01:00
parent 4bf1e31443
commit f959d39ac3
7 changed files with 231 additions and 22 deletions

View File

@ -24,6 +24,7 @@ import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexOptions;
import org.elasticsearch.Version;
import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.collect.ImmutableOpenMap; import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.Lucene;
@ -223,6 +224,15 @@ public abstract class FieldMapper extends Mapper implements Cloneable {
return context.path().pathAsText(name); return context.path().pathAsText(name);
} }
protected boolean defaultDocValues(Version indexCreated) {
if (indexCreated.onOrAfter(Version.V_3_0_0)) {
// add doc values by default to keyword (boolean, numerics, etc.) fields
return fieldType.tokenized() == false;
} else {
return fieldType.tokenized() == false && fieldType.indexOptions() != IndexOptions.NONE;
}
}
protected void setupFieldType(BuilderContext context) { protected void setupFieldType(BuilderContext context) {
fieldType.setName(buildFullName(context)); fieldType.setName(buildFullName(context));
if (fieldType.indexAnalyzer() == null && fieldType.tokenized() == false && fieldType.indexOptions() != IndexOptions.NONE) { if (fieldType.indexAnalyzer() == null && fieldType.tokenized() == false && fieldType.indexOptions() != IndexOptions.NONE) {
@ -233,14 +243,7 @@ public abstract class FieldMapper extends Mapper implements Cloneable {
Settings settings = Settings.builder().put(fieldType.fieldDataType().getSettings()).put(fieldDataSettings).build(); Settings settings = Settings.builder().put(fieldType.fieldDataType().getSettings()).put(fieldDataSettings).build();
fieldType.setFieldDataType(new FieldDataType(fieldType.fieldDataType().getType(), settings)); fieldType.setFieldDataType(new FieldDataType(fieldType.fieldDataType().getType(), settings));
} }
boolean defaultDocValues = fieldType.tokenized() == false && fieldType.indexOptions() != IndexOptions.NONE; boolean defaultDocValues = defaultDocValues(context.indexCreatedVersion());
// backcompat for "fielddata: format: docvalues" for now...
boolean fieldDataDocValues = fieldType.fieldDataType() != null
&& FieldDataType.DOC_VALUES_FORMAT_VALUE.equals(fieldType.fieldDataType().getFormat(context.indexSettings()));
if (fieldDataDocValues && docValuesSet && fieldType.hasDocValues() == false) {
// this forces the doc_values setting to be written, so fielddata does not mask the original setting
defaultDocValues = true;
}
defaultFieldType.setHasDocValues(defaultDocValues); defaultFieldType.setHasDocValues(defaultDocValues);
if (docValuesSet == false) { if (docValuesSet == false) {
fieldType.setHasDocValues(defaultDocValues || fieldDataDocValues); fieldType.setHasDocValues(defaultDocValues || fieldDataDocValues);

View File

@ -225,7 +225,9 @@ public class BooleanFieldMapper extends FieldMapper {
if (value == null) { if (value == null) {
return; return;
} }
fields.add(new Field(fieldType().name(), value ? "T" : "F", fieldType())); if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
fields.add(new Field(fieldType().name(), value ? "T" : "F", fieldType()));
}
if (fieldType().hasDocValues()) { if (fieldType().hasDocValues()) {
fields.add(new SortedNumericDocValuesField(fieldType().name(), value ? 1 : 0)); fields.add(new SortedNumericDocValuesField(fieldType().name(), value ? 1 : 0));
} }

View File

@ -31,6 +31,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.Mapper;

View File

@ -21,6 +21,7 @@ package org.elasticsearch.index.mapper.core;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
@ -28,8 +29,11 @@ import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentFactory;
@ -39,10 +43,15 @@ import org.elasticsearch.index.mapper.DocumentMapperParser;
import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.mapper.ParseContext.Document;
import org.elasticsearch.index.mapper.string.SimpleStringMappingTests;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase; import org.elasticsearch.test.ESSingleNodeTestCase;
import org.elasticsearch.test.InternalSettingsPlugin;
import org.junit.Before; import org.junit.Before;
import java.io.IOException; import java.io.IOException;
import java.util.Collection;
public class BooleanFieldMapperTests extends ESSingleNodeTestCase { public class BooleanFieldMapperTests extends ESSingleNodeTestCase {
@ -55,6 +64,11 @@ public class BooleanFieldMapperTests extends ESSingleNodeTestCase {
parser = indexService.mapperService().documentMapperParser(); parser = indexService.mapperService().documentMapperParser();
} }
@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
return pluginList(InternalSettingsPlugin.class);
}
public void testDefaults() throws IOException { public void testDefaults() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", "boolean").endObject().endObject() .startObject("properties").startObject("field").field("type", "boolean").endObject().endObject()
@ -135,4 +149,71 @@ public class BooleanFieldMapperTests extends ESSingleNodeTestCase {
ParsedDocument doc = mapper.parse("test", "type", "1", source); ParsedDocument doc = mapper.parse("test", "type", "1", source);
assertNotNull(doc.rootDoc().getField("field.as_string")); assertNotNull(doc.rootDoc().getField("field.as_string"));
} }
public void testDocValues() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("bool1")
.field("type", "boolean")
.endObject()
.startObject("bool2")
.field("type", "boolean")
.field("index", false)
.endObject()
.startObject("bool3")
.field("type", "boolean")
.field("index", true)
.endObject()
.endObject()
.endObject().endObject().string();
DocumentMapper defaultMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
ParsedDocument parsedDoc = defaultMapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
.startObject()
.field("bool1", true)
.field("bool2", true)
.field("bool3", true)
.endObject()
.bytes());
Document doc = parsedDoc.rootDoc();
assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "bool1"));
assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "bool2"));
assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "bool3"));
}
public void testBwCompatDocValues() throws Exception {
Settings oldIndexSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_2_2_0).build();
indexService = createIndex("test_old", oldIndexSettings);
parser = indexService.mapperService().documentMapperParser();
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("bool1")
.field("type", "boolean")
.endObject()
.startObject("bool2")
.field("type", "boolean")
.field("index", "no")
.endObject()
.startObject("bool3")
.field("type", "boolean")
.field("index", "not_analyzed")
.endObject()
.endObject()
.endObject().endObject().string();
DocumentMapper defaultMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
ParsedDocument parsedDoc = defaultMapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
.startObject()
.field("bool1", true)
.field("bool2", true)
.field("bool3", true)
.endObject()
.bytes());
Document doc = parsedDoc.rootDoc();
assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "bool1"));
assertEquals(DocValuesType.NONE, SimpleStringMappingTests.docValuesType(doc, "bool2"));
assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "bool3"));
}
} }

View File

@ -281,17 +281,19 @@ public class SimpleNumericTests extends ESSingleNodeTestCase {
public void testDocValues() throws Exception { public void testDocValues() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties") .startObject("properties")
.startObject("int") .startObject("int1")
.field("type", "integer") .field("type", "integer")
.startObject("fielddata")
.field("format", "doc_values")
.endObject()
.endObject() .endObject()
.startObject("double") .startObject("int2")
.field("type", "integer")
.field("index", false)
.endObject()
.startObject("double1")
.field("type", "double") .field("type", "double")
.startObject("fielddata") .endObject()
.field("format", "doc_values") .startObject("double2")
.endObject() .field("type", "integer")
.field("index", false)
.endObject() .endObject()
.endObject() .endObject()
.endObject().endObject().string(); .endObject().endObject().string();
@ -300,13 +302,56 @@ public class SimpleNumericTests extends ESSingleNodeTestCase {
ParsedDocument parsedDoc = defaultMapper.parse("test", "type", "1", XContentFactory.jsonBuilder() ParsedDocument parsedDoc = defaultMapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
.startObject() .startObject()
.field("int", "1234") .field("int1", "1234")
.field("double", "1234") .field("double1", "1234")
.field("int2", "1234")
.field("double2", "1234")
.endObject() .endObject()
.bytes()); .bytes());
final Document doc = parsedDoc.rootDoc(); Document doc = parsedDoc.rootDoc();
assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "int")); assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "int1"));
assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "double")); assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "double1"));
assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "int2"));
assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "double2"));
}
public void testBwCompatDocValues() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("int1")
.field("type", "integer")
.endObject()
.startObject("int2")
.field("type", "integer")
.field("index", "no")
.endObject()
.startObject("double1")
.field("type", "double")
.endObject()
.startObject("double2")
.field("type", "integer")
.field("index", "no")
.endObject()
.endObject()
.endObject().endObject().string();
Settings oldIndexSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_2_2_0).build();
DocumentMapper defaultMapper = createIndex("test", oldIndexSettings).mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
ParsedDocument parsedDoc = defaultMapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
.startObject()
.field("int1", "1234")
.field("double1", "1234")
.field("int2", "1234")
.field("double2", "1234")
.endObject()
.bytes());
Document doc = parsedDoc.rootDoc();
assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "int1"));
assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "double1"));
assertEquals(DocValuesType.NONE, SimpleStringMappingTests.docValuesType(doc, "int2"));
assertEquals(DocValuesType.NONE, SimpleStringMappingTests.docValuesType(doc, "double2"));
} }
public void testDocValuesOnNested() throws Exception { public void testDocValuesOnNested() throws Exception {

View File

@ -45,11 +45,14 @@ import org.elasticsearch.index.mapper.ParseContext.Document;
import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.mapper.core.StringFieldMapper; import org.elasticsearch.index.mapper.core.StringFieldMapper;
import org.elasticsearch.index.mapper.core.StringFieldMapper.Builder; import org.elasticsearch.index.mapper.core.StringFieldMapper.Builder;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase; import org.elasticsearch.test.ESSingleNodeTestCase;
import org.elasticsearch.test.InternalSettingsPlugin;
import org.elasticsearch.test.VersionUtils; import org.elasticsearch.test.VersionUtils;
import org.junit.Before; import org.junit.Before;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection;
import java.util.Map; import java.util.Map;
import static java.util.Collections.emptyMap; import static java.util.Collections.emptyMap;
@ -63,6 +66,11 @@ import static org.hamcrest.Matchers.nullValue;
public class SimpleStringMappingTests extends ESSingleNodeTestCase { public class SimpleStringMappingTests extends ESSingleNodeTestCase {
private static Settings DOC_VALUES_SETTINGS = Settings.builder().put(FieldDataType.FORMAT_KEY, FieldDataType.DOC_VALUES_FORMAT_VALUE).build(); private static Settings DOC_VALUES_SETTINGS = Settings.builder().put(FieldDataType.FORMAT_KEY, FieldDataType.DOC_VALUES_FORMAT_VALUE).build();
@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
return pluginList(InternalSettingsPlugin.class);
}
IndexService indexService; IndexService indexService;
DocumentMapperParser parser; DocumentMapperParser parser;
@ -417,6 +425,70 @@ public class SimpleStringMappingTests extends ESSingleNodeTestCase {
assertFalse(new Builder("anything").index(false).tokenized(false).docValues(false).build(ctx).fieldType().hasDocValues()); assertFalse(new Builder("anything").index(false).tokenized(false).docValues(false).build(ctx).fieldType().hasDocValues());
assertTrue(new Builder("anything").index(false).docValues(true).build(ctx).fieldType().hasDocValues()); assertTrue(new Builder("anything").index(false).docValues(true).build(ctx).fieldType().hasDocValues());
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("str1")
.field("type", "string")
.field("index", "no")
.endObject()
.startObject("str2")
.field("type", "string")
.field("index", "not_analyzed")
.endObject()
.startObject("str3")
.field("type", "string")
.field("index", "analyzed")
.endObject()
.startObject("str4")
.field("type", "string")
.field("index", "not_analyzed")
.field("doc_values", false)
.endObject()
.startObject("str5")
.field("type", "string")
.field("index", "no")
.field("doc_values", false)
.endObject()
.endObject()
.endObject().endObject().string();
DocumentMapper defaultMapper = parser.parse("type", new CompressedXContent(mapping));
ParsedDocument parsedDoc = defaultMapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
.startObject()
.field("str1", "1234")
.field("str2", "1234")
.field("str3", "1234")
.field("str4", "1234")
.field("str5", "1234")
.endObject()
.bytes());
final Document doc = parsedDoc.rootDoc();
assertEquals(DocValuesType.NONE, docValuesType(doc, "str1"));
assertEquals(DocValuesType.SORTED_SET, docValuesType(doc, "str2"));
assertEquals(DocValuesType.NONE, docValuesType(doc, "str3"));
assertEquals(DocValuesType.NONE, docValuesType(doc, "str4"));
assertEquals(DocValuesType.NONE, docValuesType(doc, "str5"));
}
public void testBwCompatDocValues() throws Exception {
Settings oldIndexSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_2_2_0).build();
indexService = createIndex("test_old", oldIndexSettings);
parser = indexService.mapperService().documentMapperParser();
// doc values only work on non-analyzed content
final BuilderContext ctx = new BuilderContext(indexService.getIndexSettings().getSettings(), new ContentPath(1));
try {
new StringFieldMapper.Builder("anything").docValues(true).build(ctx);
fail();
} catch (Exception e) { /* OK */ }
assertFalse(new Builder("anything").index(false).build(ctx).fieldType().hasDocValues());
assertTrue(new Builder("anything").index(true).tokenized(false).build(ctx).fieldType().hasDocValues());
assertFalse(new Builder("anything").index(true).tokenized(true).build(ctx).fieldType().hasDocValues());
assertFalse(new Builder("anything").index(false).tokenized(false).docValues(false).build(ctx).fieldType().hasDocValues());
assertTrue(new Builder("anything").index(false).docValues(true).build(ctx).fieldType().hasDocValues());
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties") .startObject("properties")
.startObject("str1") .startObject("str1")

View File

@ -257,6 +257,11 @@ amount of heap as long as the total indexing buffer heap used across all shards
[[breaking_30_mapping_changes]] [[breaking_30_mapping_changes]]
=== Mapping changes === Mapping changes
==== Default doc values settings
Doc values are now also on by default on numeric and boolean fields that are
not indexed.
==== Transform removed ==== Transform removed
The `transform` feature from mappings has been removed. It made issues very hard to debug. The `transform` feature from mappings has been removed. It made issues very hard to debug.