From fbe617c37b9b79405d95736ead58b3e6c90d5a52 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 10 Jun 2015 09:55:44 +0200 Subject: [PATCH] Internal: Make CompressedXContent.equals fast again. We had to make CompressedXContent.equals decompress data to fix some correctness issues which had the downside of making equals() slow. Now we store a crc32 alongside compressed data which should help avoid decompress data in most cases. Close #11247 --- .../common/compress/CompressedXContent.java | 92 ++++++++++++++++--- .../index/mapper/DocumentMapper.java | 11 +-- 2 files changed, 78 insertions(+), 25 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/common/compress/CompressedXContent.java b/core/src/main/java/org/elasticsearch/common/compress/CompressedXContent.java index 09ced0e29b2..462b91aeef0 100644 --- a/core/src/main/java/org/elasticsearch/common/compress/CompressedXContent.java +++ b/core/src/main/java/org/elasticsearch/common/compress/CompressedXContent.java @@ -25,9 +25,16 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentType; import java.io.IOException; +import java.io.OutputStream; import java.util.Arrays; +import java.util.zip.CRC32; +import java.util.zip.CheckedOutputStream; /** * Similar class to the {@link String} class except that it internally stores @@ -37,23 +44,79 @@ import java.util.Arrays; */ public final class CompressedXContent { - private final byte[] bytes; - private int hashCode; + private static int crc32(BytesReference data) { + OutputStream dummy = new OutputStream() { + @Override + public void write(int b) throws IOException { + // no-op + } + @Override + public void write(byte[] b, int off, int len) throws IOException { + // no-op + } + }; + CRC32 crc32 = new CRC32(); + try { + data.writeTo(new CheckedOutputStream(dummy, crc32)); + } catch (IOException bogus) { + // cannot happen + throw new Error(bogus); + } + return (int) crc32.getValue(); + } + private final byte[] bytes; + private final int crc32; + + // Used for serialization + private CompressedXContent(byte[] compressed, int crc32) { + this.bytes = compressed; + this.crc32 = crc32; + assertConsistent(); + } + + /** + * Create a {@link CompressedXContent} out of a {@link ToXContent} instance. + */ + public CompressedXContent(ToXContent xcontent, XContentType type, ToXContent.Params params) throws IOException { + BytesStreamOutput bStream = new BytesStreamOutput(); + OutputStream compressedStream = CompressorFactory.defaultCompressor().streamOutput(bStream); + CRC32 crc32 = new CRC32(); + OutputStream checkedStream = new CheckedOutputStream(compressedStream, crc32); + try (XContentBuilder builder = XContentFactory.contentBuilder(type, checkedStream)) { + builder.startObject(); + xcontent.toXContent(builder, params); + builder.endObject(); + } + this.bytes = bStream.bytes().toBytes(); + this.crc32 = (int) crc32.getValue(); + assertConsistent(); + } + + /** + * Create a {@link CompressedXContent} out of a serialized {@link ToXContent} + * that may already be compressed. + */ public CompressedXContent(BytesReference data) throws IOException { Compressor compressor = CompressorFactory.compressor(data); if (compressor != null) { // already compressed... this.bytes = data.toBytes(); + this.crc32 = crc32(new BytesArray(uncompressed())); } else { BytesStreamOutput out = new BytesStreamOutput(); - try (StreamOutput compressedOutput = CompressorFactory.defaultCompressor().streamOutput(out)) { + try (OutputStream compressedOutput = CompressorFactory.defaultCompressor().streamOutput(out)) { data.writeTo(compressedOutput); } this.bytes = out.bytes().toBytes(); - assert CompressorFactory.compressor(new BytesArray(bytes)) != null; + this.crc32 = crc32(data); } + assertConsistent(); + } + private void assertConsistent() { + assert CompressorFactory.compressor(new BytesArray(bytes)) != null; + assert this.crc32 == crc32(new BytesArray(uncompressed())); } public CompressedXContent(byte[] data) throws IOException { @@ -88,12 +151,14 @@ public final class CompressedXContent { } public static CompressedXContent readCompressedString(StreamInput in) throws IOException { - byte[] bytes = new byte[in.readVInt()]; - in.readBytes(bytes, 0, bytes.length); - return new CompressedXContent(bytes); + int crc32 = in.readInt(); + byte[] compressed = new byte[in.readVInt()]; + in.readBytes(compressed, 0, compressed.length); + return new CompressedXContent(compressed, crc32); } public void writeTo(StreamOutput out) throws IOException { + out.writeInt(crc32); out.writeVInt(bytes.length); out.writeBytes(bytes); } @@ -109,19 +174,16 @@ public final class CompressedXContent { return true; } + if (crc32 != that.crc32) { + return false; + } + return Arrays.equals(uncompressed(), that.uncompressed()); } @Override public int hashCode() { - if (hashCode == 0) { - int h = Arrays.hashCode(uncompressed()); - if (h == 0) { - h = 1; - } - hashCode = h; - } - return hashCode; + return crc32; } @Override diff --git a/core/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java index 01d4b78a87f..825d0ddc628 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java @@ -36,15 +36,12 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.collect.MapBuilder; import org.elasticsearch.common.compress.CompressedXContent; -import org.elasticsearch.common.compress.CompressorFactory; -import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.text.StringAndBytesText; import org.elasticsearch.common.text.Text; import org.elasticsearch.common.util.concurrent.ReleasableLock; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.mapper.Mapping.SourceTransform; import org.elasticsearch.index.mapper.internal.AllFieldMapper; @@ -481,13 +478,7 @@ public class DocumentMapper implements ToXContent { private void refreshSource() throws ElasticsearchGenerationException { try { - BytesStreamOutput bStream = new BytesStreamOutput(); - try (XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON, CompressorFactory.defaultCompressor().streamOutput(bStream))) { - builder.startObject(); - toXContent(builder, ToXContent.EMPTY_PARAMS); - builder.endObject(); - } - mappingSource = new CompressedXContent(bStream.bytes()); + mappingSource = new CompressedXContent(this, XContentType.JSON, ToXContent.EMPTY_PARAMS); } catch (Exception e) { throw new ElasticsearchGenerationException("failed to serialize source for type [" + type + "]", e); }