Mappings: Remove the `compress`/`compress_threshold` options of the BinaryFieldMapper.

This option is broken currently since it potentially interprets an incoming
binary value as compressed while it just happens that the first bytes are the
same as the LZF header.
This commit is contained in:
Adrien Grand 2015-05-21 15:12:43 +02:00
parent 5a0c456ac2
commit 461683ac58
4 changed files with 93 additions and 104 deletions

View File

@ -477,22 +477,6 @@ binary type:
Set to `true` to store field values in a column-stride fashion. Set to `true` to store field values in a column-stride fashion.
`compress`::
Set to `true` to compress the stored binary value.
`compress_threshold`::
Compression will only be applied to stored binary fields that are greater
than this size. Defaults to `-1`
NOTE: Enabling compression on stored binary fields only makes sense on large
and highly-compressible values. Otherwise per-field compression is usually not
worth doing as the space savings do not compensate for the overhead of the
compression format. Normally, you should not configure any compression and
just rely on the block compression of stored fields (which is enabled by
default and can't be disabled).
[float] [float]
[[fielddata-filters]] [[fielddata-filters]]
==== Fielddata filters ==== Fielddata filters

View File

@ -178,6 +178,7 @@ A `RoutingMissingException` is now thrown instead.
* The setting `index.mapping.allow_type_wrapper` has been removed. Documents should always be sent without the type as the root element. * The setting `index.mapping.allow_type_wrapper` has been removed. Documents should always be sent without the type as the root element.
* The delete mappings API has been removed. Mapping types can no longer be deleted. * The delete mappings API has been removed. Mapping types can no longer be deleted.
* The `ignore_conflicts` option of the put mappings API has been removed. Conflicts can't be ignored anymore. * The `ignore_conflicts` option of the put mappings API has been removed. Conflicts can't be ignored anymore.
* The `binary` field does not support the `compress` and `compress_threshold` options anymore.
==== Removed type prefix on field names in queries ==== Removed type prefix on field names in queries
Types can no longer be specified on fields within queries. Instead, specify type restrictions in the search request. Types can no longer be specified on fields within queries. Instead, specify type restrictions in the search request.

View File

@ -29,24 +29,19 @@ import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.Version;
import org.elasticsearch.common.Base64; import org.elasticsearch.common.Base64;
import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.compress.CompressorFactory; import org.elasticsearch.common.compress.CompressorFactory;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.fielddata.FieldDataType; import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.MergeResult;
import org.elasticsearch.index.mapper.MergeMappingException;
import org.elasticsearch.index.mapper.ParseContext; import org.elasticsearch.index.mapper.ParseContext;
import java.io.IOException; import java.io.IOException;
@ -54,7 +49,6 @@ import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import static org.elasticsearch.common.xcontent.support.XContentMapValues.nodeBooleanValue;
import static org.elasticsearch.index.mapper.MapperBuilders.binaryField; import static org.elasticsearch.index.mapper.MapperBuilders.binaryField;
import static org.elasticsearch.index.mapper.core.TypeParsers.parseField; import static org.elasticsearch.index.mapper.core.TypeParsers.parseField;
@ -64,9 +58,11 @@ import static org.elasticsearch.index.mapper.core.TypeParsers.parseField;
public class BinaryFieldMapper extends AbstractFieldMapper<BytesReference> { public class BinaryFieldMapper extends AbstractFieldMapper<BytesReference> {
public static final String CONTENT_TYPE = "binary"; public static final String CONTENT_TYPE = "binary";
private static final ParseField COMPRESS = new ParseField("compress").withAllDeprecated("no replacement, implemented at the codec level");
private static final ParseField COMPRESS_THRESHOLD = new ParseField("compress_threshold").withAllDeprecated("no replacement");
public static class Defaults extends AbstractFieldMapper.Defaults { public static class Defaults extends AbstractFieldMapper.Defaults {
public static final long COMPRESS_THRESHOLD = -1;
public static final FieldType FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE); public static final FieldType FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE);
static { static {
@ -77,28 +73,14 @@ public class BinaryFieldMapper extends AbstractFieldMapper<BytesReference> {
public static class Builder extends AbstractFieldMapper.Builder<Builder, BinaryFieldMapper> { public static class Builder extends AbstractFieldMapper.Builder<Builder, BinaryFieldMapper> {
private Boolean compress = null;
private long compressThreshold = Defaults.COMPRESS_THRESHOLD;
public Builder(String name) { public Builder(String name) {
super(name, new FieldType(Defaults.FIELD_TYPE)); super(name, new FieldType(Defaults.FIELD_TYPE));
builder = this; builder = this;
} }
public Builder compress(boolean compress) {
this.compress = compress;
return this;
}
public Builder compressThreshold(long compressThreshold) {
this.compressThreshold = compressThreshold;
return this;
}
@Override @Override
public BinaryFieldMapper build(BuilderContext context) { public BinaryFieldMapper build(BuilderContext context) {
return new BinaryFieldMapper(buildNames(context), fieldType, docValues, compress, compressThreshold, return new BinaryFieldMapper(buildNames(context), fieldType, docValues,
fieldDataSettings, context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo); fieldDataSettings, context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
} }
} }
@ -110,19 +92,9 @@ public class BinaryFieldMapper extends AbstractFieldMapper<BytesReference> {
parseField(builder, name, node, parserContext); parseField(builder, name, node, parserContext);
for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext();) { for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext();) {
Map.Entry<String, Object> entry = iterator.next(); Map.Entry<String, Object> entry = iterator.next();
String fieldName = Strings.toUnderscoreCase(entry.getKey()); String fieldName = entry.getKey();
Object fieldNode = entry.getValue(); if (parserContext.indexVersionCreated().before(Version.V_2_0_0) &&
if (fieldName.equals("compress") && fieldNode != null) { (COMPRESS.match(fieldName) || COMPRESS_THRESHOLD.match(fieldName))) {
builder.compress(nodeBooleanValue(fieldNode));
iterator.remove();
} else if (fieldName.equals("compress_threshold") && fieldNode != null) {
if (fieldNode instanceof Number) {
builder.compressThreshold(((Number) fieldNode).longValue());
builder.compress(true);
} else {
builder.compressThreshold(ByteSizeValue.parseBytesSizeValue(fieldNode.toString()).bytes());
builder.compress(true);
}
iterator.remove(); iterator.remove();
} }
} }
@ -130,15 +102,9 @@ public class BinaryFieldMapper extends AbstractFieldMapper<BytesReference> {
} }
} }
private Boolean compress; protected BinaryFieldMapper(Names names, FieldType fieldType, Boolean docValues,
private long compressThreshold;
protected BinaryFieldMapper(Names names, FieldType fieldType, Boolean docValues, Boolean compress, long compressThreshold,
@Nullable Settings fieldDataSettings, Settings indexSettings, MultiFields multiFields, CopyTo copyTo) { @Nullable Settings fieldDataSettings, Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
super(names, 1.0f, fieldType, docValues, null, null, null, null, fieldDataSettings, indexSettings, multiFields, copyTo); super(names, 1.0f, fieldType, docValues, null, null, null, null, fieldDataSettings, indexSettings, multiFields, copyTo);
this.compress = compress;
this.compressThreshold = compressThreshold;
} }
@Override @Override
@ -177,7 +143,11 @@ public class BinaryFieldMapper extends AbstractFieldMapper<BytesReference> {
} }
} }
try { try {
return CompressorFactory.uncompressIfNeeded(bytes); if (indexCreatedBefore2x) {
return CompressorFactory.uncompressIfNeeded(bytes);
} else {
return bytes;
}
} catch (IOException e) { } catch (IOException e) {
throw new ElasticsearchParseException("failed to decompress source", e); throw new ElasticsearchParseException("failed to decompress source", e);
} }
@ -199,15 +169,6 @@ public class BinaryFieldMapper extends AbstractFieldMapper<BytesReference> {
if (value == null) { if (value == null) {
return; return;
} }
if (compress != null && compress && !CompressorFactory.isCompressed(value, 0, value.length)) {
if (compressThreshold == -1 || value.length > compressThreshold) {
BytesStreamOutput bStream = new BytesStreamOutput();
StreamOutput stream = CompressorFactory.defaultCompressor().streamOutput(bStream);
stream.writeBytes(value, 0, value.length);
stream.close();
value = bStream.bytes().toBytes();
}
}
if (fieldType().stored()) { if (fieldType().stored()) {
fields.add(new Field(names.indexName(), value, fieldType)); fields.add(new Field(names.indexName(), value, fieldType));
} }
@ -229,39 +190,6 @@ public class BinaryFieldMapper extends AbstractFieldMapper<BytesReference> {
return CONTENT_TYPE; return CONTENT_TYPE;
} }
@Override
protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException {
super.doXContentBody(builder, includeDefaults, params);
if (compress != null) {
builder.field("compress", compress);
} else if (includeDefaults) {
builder.field("compress", false);
}
if (compressThreshold != -1) {
builder.field("compress_threshold", new ByteSizeValue(compressThreshold).toString());
} else if (includeDefaults) {
builder.field("compress_threshold", -1);
}
}
@Override
public void merge(Mapper mergeWith, MergeResult mergeResult) throws MergeMappingException {
super.merge(mergeWith, mergeResult);
if (!this.getClass().equals(mergeWith.getClass())) {
return;
}
BinaryFieldMapper sourceMergeWith = (BinaryFieldMapper) mergeWith;
if (!mergeResult.simulate()) {
if (sourceMergeWith.compress != null) {
this.compress = sourceMergeWith.compress;
}
if (sourceMergeWith.compressThreshold != -1) {
this.compressThreshold = sourceMergeWith.compressThreshold;
}
}
}
public static class CustomBinaryDocValuesField extends NumberFieldMapper.CustomNumericDocValuesField { public static class CustomBinaryDocValuesField extends NumberFieldMapper.CustomNumericDocValuesField {
public static final FieldType TYPE = new FieldType(); public static final FieldType TYPE = new FieldType();

View File

@ -19,13 +19,26 @@
package org.elasticsearch.index.mapper.binary; package org.elasticsearch.index.mapper.binary;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.compress.CompressorFactory;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.mapper.core.BinaryFieldMapper; import org.elasticsearch.index.mapper.core.BinaryFieldMapper;
import org.elasticsearch.test.ElasticsearchSingleNodeTest; import org.elasticsearch.test.ElasticsearchSingleNodeTest;
import org.junit.Test; import org.junit.Test;
import java.io.IOException;
import java.util.Arrays;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.instanceOf;
@ -33,7 +46,6 @@ import static org.hamcrest.Matchers.instanceOf;
*/ */
public class BinaryMappingTests extends ElasticsearchSingleNodeTest { public class BinaryMappingTests extends ElasticsearchSingleNodeTest {
@Test
public void testDefaultMapping() throws Exception { public void testDefaultMapping() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties") .startObject("properties")
@ -50,4 +62,68 @@ public class BinaryMappingTests extends ElasticsearchSingleNodeTest {
assertThat(fieldMapper.fieldType().stored(), equalTo(false)); assertThat(fieldMapper.fieldType().stored(), equalTo(false));
} }
public void testStoredValue() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field")
.field("type", "binary")
.field("store", "yes")
.endObject()
.endObject()
.endObject().endObject().string();
DocumentMapper mapper = createIndex("test").mapperService().documentMapperParser().parse(mapping);
// case 1: a simple binary value
final byte[] binaryValue1 = new byte[100];
binaryValue1[56] = 1;
// case 2: a value that looks compressed: this used to fail in 1.x
BytesStreamOutput out = new BytesStreamOutput();
try (StreamOutput compressed = CompressorFactory.defaultCompressor().streamOutput(out)) {
new BytesArray(binaryValue1).writeTo(compressed);
}
final byte[] binaryValue2 = out.bytes().toBytes();
assertTrue(CompressorFactory.isCompressed(binaryValue2));
for (byte[] value : Arrays.asList(binaryValue1, binaryValue2)) {
ParsedDocument doc = mapper.parse("type", "id", XContentFactory.jsonBuilder().startObject().field("field", value).endObject().bytes());
BytesRef indexedValue = doc.rootDoc().getBinaryValue("field");
assertEquals(new BytesRef(value), indexedValue);
FieldMapper<?> fieldMapper = mapper.mappers().smartNameFieldMapper("field");
Object originalValue = fieldMapper.valueForSearch(indexedValue);
assertEquals(new BytesArray(value), originalValue);
}
}
public void testCompressedBackCompat() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field")
.field("type", "binary")
.field("store", "yes")
.endObject()
.endObject()
.endObject().endObject().string();
Settings settings = ImmutableSettings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_1_5_0).build();
DocumentMapper mapper = createIndex("test", settings).mapperService().documentMapperParser().parse(mapping);
final byte[] original = new byte[100];
original[56] = 1;
BytesStreamOutput out = new BytesStreamOutput();
try (StreamOutput compressed = CompressorFactory.defaultCompressor().streamOutput(out)) {
new BytesArray(original).writeTo(compressed);
}
final byte[] binaryValue = out.bytes().toBytes();
assertTrue(CompressorFactory.isCompressed(binaryValue));
ParsedDocument doc = mapper.parse("type", "id", XContentFactory.jsonBuilder().startObject().field("field", binaryValue).endObject().bytes());
BytesRef indexedValue = doc.rootDoc().getBinaryValue("field");
assertEquals(new BytesRef(binaryValue), indexedValue);
FieldMapper<?> fieldMapper = mapper.mappers().smartNameFieldMapper("field");
Object originalValue = fieldMapper.valueForSearch(indexedValue);
assertEquals(new BytesArray(original), originalValue);
}
} }