Mappings: Lock down _id field

There are two implications to this change.
First, percolator now uses _uid internally, extracting the id portion
when needed. Second, sorting on _id is no longer possible, since you
can no longer index _id. However, _uid can still be used to sort, and
is better anyways as indexing _id just to make it available to
fielddata for sorting is wasteful.

see #8143
closes #9842
This commit is contained in:
Ryan Ernst 2015-02-24 01:20:13 -08:00
parent c54bd2f7ad
commit b96bd201c1
13 changed files with 63 additions and 50 deletions

View File

@ -237,6 +237,12 @@ curl -XGET 'localhost:9200/index/type/_search'
}
---------------
==== Meta fields have limited confiugration
Meta fields (those beginning with underscore) are fields used by elasticsearch
to provide special features. They now have limited configuration options.
* `_id` configuration can no longer be changed. If you need to sort, use `_uid` instead.
=== Codecs
It is no longer possible to specify per-field postings and doc values formats

View File

@ -196,7 +196,6 @@ public class MapperService extends AbstractIndexComponent {
defaultPercolatorMappingSource = "{\n" +
//" \"" + PercolatorService.TYPE_NAME + "\":{\n" +
" \"" + "_default_" + "\":{\n" +
" \"_id\" : {\"index\": \"not_analyzed\"}," +
" \"properties\" : {\n" +
" \"query\" : {\n" +
" \"type\" : \"object\",\n" +

View File

@ -121,13 +121,16 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
public static class TypeParser implements Mapper.TypeParser {
@Override
public Mapper.Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
if (parserContext.indexVersionCreated().onOrAfter(Version.V_2_0_0)) {
throw new MapperParsingException(NAME + " is not configurable");
}
IdFieldMapper.Builder builder = id();
parseField(builder, builder.name, node, parserContext);
for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext();) {
Map.Entry<String, Object> entry = iterator.next();
String fieldName = Strings.toUnderscoreCase(entry.getKey());
Object fieldNode = entry.getValue();
if (fieldName.equals("path") && parserContext.indexVersionCreated().before(Version.V_2_0_0)) {
if (fieldName.equals("path")) {
builder.path(fieldNode.toString());
iterator.remove();
}
@ -151,7 +154,8 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
private static FieldType idFieldType(Settings indexSettings) {
FieldType fieldType = new FieldType(Defaults.FIELD_TYPE);
if (indexSettings.getAsBoolean("index.mapping._id.indexed", true) == false) {
boolean pre2x = Version.indexCreated(indexSettings).before(Version.V_2_0_0);
if (pre2x && indexSettings.getAsBoolean("index.mapping._id.indexed", true) == false) {
fieldType.setTokenized(false);
}
return fieldType;
@ -345,6 +349,9 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
if (writePre2xSettings == false) {
return builder;
}
boolean includeDefaults = params.paramAsBoolean("include_defaults", false);
// if all are defaults, no sense to write it at all
@ -361,7 +368,7 @@ public class IdFieldMapper extends AbstractFieldMapper<String> implements Intern
if (includeDefaults || fieldType.indexOptions() != Defaults.FIELD_TYPE.indexOptions()) {
builder.field("index", indexTokenizeOptionToString(fieldType.indexOptions() != IndexOptions.NONE, fieldType.tokenized()));
}
if (writePre2xSettings && (includeDefaults || path != Defaults.PATH)) {
if (includeDefaults || path != Defaults.PATH) {
builder.field("path", path);
}

View File

@ -32,7 +32,8 @@ import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
import org.elasticsearch.index.fieldvisitor.JustSourceFieldsVisitor;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.internal.IdFieldMapper;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import java.io.IOException;
import java.util.Map;
@ -44,17 +45,17 @@ final class QueriesLoaderCollector extends SimpleCollector {
private final Map<BytesRef, Query> queries = Maps.newHashMap();
private final JustSourceFieldsVisitor fieldsVisitor = new JustSourceFieldsVisitor();
private final PercolatorQueriesRegistry percolator;
private final IndexFieldData<?> idFieldData;
private final IndexFieldData<?> uidFieldData;
private final ESLogger logger;
private SortedBinaryDocValues idValues;
private SortedBinaryDocValues uidValues;
private LeafReader reader;
QueriesLoaderCollector(PercolatorQueriesRegistry percolator, ESLogger logger, MapperService mapperService, IndexFieldDataService indexFieldDataService) {
this.percolator = percolator;
this.logger = logger;
final FieldMapper<?> idMapper = mapperService.smartNameFieldMapper(IdFieldMapper.NAME);
this.idFieldData = indexFieldDataService.getForField(idMapper);
final FieldMapper<?> uidMapper = mapperService.smartNameFieldMapper(UidFieldMapper.NAME);
this.uidFieldData = indexFieldDataService.getForField(uidMapper);
}
public Map<BytesRef, Query> queries() {
@ -65,10 +66,11 @@ final class QueriesLoaderCollector extends SimpleCollector {
public void collect(int doc) throws IOException {
// the _source is the query
idValues.setDocument(doc);
if (idValues.count() > 0) {
assert idValues.count() == 1;
BytesRef id = idValues.valueAt(0);
uidValues.setDocument(doc);
if (uidValues.count() > 0) {
assert uidValues.count() == 1;
final BytesRef uid = uidValues.valueAt(0);
final BytesRef id = Uid.splitUidIntoTypeAndId(uid)[1];
fieldsVisitor.reset();
reader.document(doc, fieldsVisitor);
@ -90,7 +92,7 @@ final class QueriesLoaderCollector extends SimpleCollector {
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
reader = context.reader();
idValues = idFieldData.load(context).getBytesValues();
uidValues = uidFieldData.load(context).getBytesValues();
}
@Override

View File

@ -19,7 +19,6 @@
package org.elasticsearch.percolator;
import com.carrotsearch.hppc.ByteObjectOpenHashMap;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.memory.ExtendedMemoryIndex;
@ -68,7 +67,9 @@ import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.mapper.internal.IdFieldMapper;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import org.elasticsearch.index.percolator.stats.ShardPercolateService;
import org.elasticsearch.index.query.ParsedQuery;
import org.elasticsearch.index.search.nested.NonNestedDocsFilter;
@ -99,8 +100,6 @@ import static org.elasticsearch.percolator.QueryCollector.count;
import static org.elasticsearch.percolator.QueryCollector.match;
import static org.elasticsearch.percolator.QueryCollector.matchAndScore;
/**
*/
public class PercolatorService extends AbstractComponent {
public final static float NO_SCORE = Float.NEGATIVE_INFINITY;
@ -741,18 +740,18 @@ public class PercolatorService extends AbstractComponent {
hls = new ArrayList<>(topDocs.scoreDocs.length);
}
final FieldMapper<?> idMapper = context.mapperService().smartNameFieldMapper(IdFieldMapper.NAME);
final IndexFieldData<?> idFieldData = context.fieldData().getForField(idMapper);
final FieldMapper<?> uidMapper = context.mapperService().smartNameFieldMapper(UidFieldMapper.NAME);
final IndexFieldData<?> uidFieldData = context.fieldData().getForField(uidMapper);
int i = 0;
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
int segmentIdx = ReaderUtil.subIndex(scoreDoc.doc, percolatorSearcher.reader().leaves());
LeafReaderContext atomicReaderContext = percolatorSearcher.reader().leaves().get(segmentIdx);
SortedBinaryDocValues values = idFieldData.load(atomicReaderContext).getBytesValues();
SortedBinaryDocValues values = uidFieldData.load(atomicReaderContext).getBytesValues();
final int localDocId = scoreDoc.doc - atomicReaderContext.docBase;
values.setDocument(localDocId);
final int numValues = values.count();
assert numValues == 1;
BytesRef bytes = values.valueAt(0);
BytesRef bytes = Uid.splitUidIntoTypeAndId(values.valueAt(0))[1];
matches.add(BytesRef.deepCopyOf(bytes));
if (hls != null) {
Query query = context.percolateQueries().get(bytes);

View File

@ -19,7 +19,6 @@
package org.elasticsearch.percolator;
import com.carrotsearch.hppc.FloatArrayList;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafCollector;
@ -34,7 +33,8 @@ import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.internal.IdFieldMapper;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import org.elasticsearch.index.query.ParsedQuery;
import org.elasticsearch.index.search.nested.NonNestedDocsFilter;
import org.elasticsearch.search.aggregations.Aggregator;
@ -54,7 +54,7 @@ import java.util.concurrent.ConcurrentMap;
*/
abstract class QueryCollector extends SimpleCollector {
final IndexFieldData<?> idFieldData;
final IndexFieldData<?> uidFieldData;
final IndexSearcher searcher;
final ConcurrentMap<BytesRef, Query> queries;
final ESLogger logger;
@ -72,8 +72,8 @@ abstract class QueryCollector extends SimpleCollector {
this.logger = logger;
this.queries = context.percolateQueries();
this.searcher = context.docSearcher();
final FieldMapper<?> idMapper = context.mapperService().smartNameFieldMapper(IdFieldMapper.NAME);
this.idFieldData = context.fieldData().getForField(idMapper);
final FieldMapper<?> uidMapper = context.mapperService().smartNameFieldMapper(UidFieldMapper.NAME);
this.uidFieldData = context.fieldData().getForField(uidMapper);
this.isNestedDoc = isNestedDoc;
List<Aggregator> aggregatorCollectors = new ArrayList<>();
@ -111,7 +111,7 @@ abstract class QueryCollector extends SimpleCollector {
@Override
public void doSetNextReader(LeafReaderContext context) throws IOException {
// we use the UID because id might not be indexed
values = idFieldData.load(context).getBytesValues();
values = uidFieldData.load(context).getBytesValues();
aggregatorLeafCollector = aggregatorCollector.getLeafCollector(context);
}
@ -139,7 +139,7 @@ abstract class QueryCollector extends SimpleCollector {
return null;
}
assert numValues == 1;
current = values.valueAt(0);
current = Uid.splitUidIntoTypeAndId(values.valueAt(0))[1];
return queries.get(current);
}

View File

@ -20,9 +20,13 @@
package org.elasticsearch.count.query;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
import org.elasticsearch.action.ShardOperationFailedException;
import org.elasticsearch.action.count.CountResponse;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.*;
import org.elasticsearch.index.query.CommonTermsQueryBuilder.Operator;
@ -256,10 +260,11 @@ public class CountQueryTests extends ElasticsearchIntegrationTest {
}
private void idsFilterTests(String index) throws Exception {
assertAcked(prepareCreate("test")
.addMapping("type1", jsonBuilder().startObject().startObject("type1")
.startObject("_id").field("index", index).endObject()
.endObject().endObject()));
Settings indexSettings = ImmutableSettings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_1_4_2.id).build();
assertAcked(prepareCreate("test").setSettings(indexSettings)
.addMapping("type1", jsonBuilder().startObject().startObject("type1")
.startObject("_id").field("index", index).endObject()
.endObject().endObject()));
indexRandom(true, client().prepareIndex("test", "type1", "1").setSource("field1", "value1"),
client().prepareIndex("test", "type1", "2").setSource("field1", "value2"),

View File

@ -71,11 +71,12 @@ public class IdMappingTests extends ElasticsearchSingleNodeTest {
assertThat(doc.rootDoc().get(IdFieldMapper.NAME), nullValue());
}
public void testIdIndexed() throws Exception {
public void testIdIndexedBackcompat() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_id").field("index", "not_analyzed").endObject()
.endObject().endObject().string();
DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse(mapping);
Settings indexSettings = ImmutableSettings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_1_4_2.id).build();
DocumentMapper docMapper = createIndex("test", indexSettings).mapperService().documentMapperParser().parse(mapping);
ParsedDocument doc = docMapper.parse("type", "1", XContentFactory.jsonBuilder()
.startObject()
@ -95,7 +96,7 @@ public class IdMappingTests extends ElasticsearchSingleNodeTest {
assertThat(doc.rootDoc().get(IdFieldMapper.NAME), notNullValue());
}
public void testIdPath() throws Exception {
public void testIdPathBackcompat() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_id").field("path", "my_path").endObject()
.endObject().endObject().string();

View File

@ -6,8 +6,6 @@
date_formats:["yyyy-MM-dd", "dd-MM-yyyy"],
dynamic:false,
enabled:true,
_id:{
},
_source:{
},
_type:{

View File

@ -55,8 +55,8 @@ public class ChildrenTests extends ElasticsearchIntegrationTest {
public void setupSuiteScopeCluster() throws Exception {
assertAcked(
prepareCreate("test")
.addMapping("article", "_id", "index=not_analyzed")
.addMapping("comment", "_parent", "type=article", "_id", "index=not_analyzed")
.addMapping("article")
.addMapping("comment", "_parent", "type=article")
);
List<IndexRequestBuilder> requests = new ArrayList<>();
@ -70,6 +70,7 @@ public class ChildrenTests extends ElasticsearchIntegrationTest {
for (int i = 0; i < numParentDocs; i++) {
String id = Integer.toString(i);
// TODO: this array is always of length 1, and testChildrenAggs fails if this is changed
String[] categories = new String[randomIntBetween(1,1)];
for (int j = 0; j < categories.length; j++) {
String category = categories[j] = uniqueCategories[catIndex++ % uniqueCategories.length];
@ -165,7 +166,7 @@ public class ChildrenTests extends ElasticsearchIntegrationTest {
.setQuery(matchQuery("randomized", false))
.addAggregation(
terms("category").field("category").size(0).subAggregation(
children("to_comment").childType("comment").subAggregation(topHits("top_comments").addSort("_id", SortOrder.ASC))
children("to_comment").childType("comment").subAggregation(topHits("top_comments").addSort("_uid", SortOrder.ASC))
)
).get();
assertSearchResponse(searchResponse);
@ -192,10 +193,8 @@ public class ChildrenTests extends ElasticsearchIntegrationTest {
assertThat(childrenBucket.getDocCount(), equalTo(2l));
TopHits topHits = childrenBucket.getAggregations().get("top_comments");
assertThat(topHits.getHits().totalHits(), equalTo(2l));
assertThat(topHits.getHits().getAt(0).sortValues()[0].toString(), equalTo("a"));
assertThat(topHits.getHits().getAt(0).getId(), equalTo("a"));
assertThat(topHits.getHits().getAt(0).getType(), equalTo("comment"));
assertThat(topHits.getHits().getAt(1).sortValues()[0].toString(), equalTo("c"));
assertThat(topHits.getHits().getAt(1).getId(), equalTo("c"));
assertThat(topHits.getHits().getAt(1).getType(), equalTo("comment"));

View File

@ -30,6 +30,7 @@ import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.action.search.ShardSearchFailure;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.query.*;
@ -629,7 +630,8 @@ public class SearchQueryTests extends ElasticsearchIntegrationTest {
}
private void idsFilterTests(String index) throws Exception {
assertAcked(client().admin().indices().prepareCreate("test")
Settings indexSettings = ImmutableSettings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_1_4_2.id).build();
assertAcked(client().admin().indices().prepareCreate("test").setSettings(indexSettings)
.addMapping("type1", jsonBuilder().startObject().startObject("type1")
.startObject("_id").field("index", index).endObject()
.endObject().endObject()));

View File

@ -1562,7 +1562,6 @@ public class SimpleSortTests extends ElasticsearchIntegrationTest {
final boolean timestampDocValues = maybeDocValues();
assertAcked(prepareCreate("test")
.addMapping("type", XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_id").field("index", !idDocValues || randomBoolean() ? "not_analyzed" : "no").startObject("fielddata").field("format", idDocValues ? "doc_values" : null).endObject().endObject()
.startObject("_timestamp").field("enabled", true).field("store", true).field("index", !timestampDocValues || randomBoolean() ? "not_analyzed" : "no").startObject("fielddata").field("format", timestampDocValues ? "doc_values" : null).endObject().endObject()
.endObject().endObject()));
ensureGreen();
@ -1588,7 +1587,8 @@ public class SimpleSortTests extends ElasticsearchIntegrationTest {
previous = uid;
}
/*searchResponse = client().prepareSearch()
/*
searchResponse = client().prepareSearch()
.setQuery(matchAllQuery())
.setSize(randomIntBetween(1, numDocs + 5))
.addSort("_id", order)

View File

@ -312,11 +312,6 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase
XContentBuilder mappings = null;
if (frequently() && randomDynamicTemplates()) {
mappings = XContentFactory.jsonBuilder().startObject().startObject("_default_");
if (randomBoolean()) {
mappings.startObject(IdFieldMapper.NAME)
.field("index", randomFrom("not_analyzed", "no"))
.endObject();
}
if (randomBoolean()) {
mappings.startObject(TypeFieldMapper.NAME)
.field("index", randomFrom("no", "not_analyzed"))