Internal: Make CompressedXContent.equals fast again.

We had to make CompressedXContent.equals decompress data to fix some
correctness issues which had the downside of making equals() slow. Now we store
a crc32 alongside compressed data which should help avoid decompress data in
most cases.

Close #11247
This commit is contained in:
Adrien Grand 2015-06-10 09:55:44 +02:00
parent 49bef19878
commit fbe617c37b
2 changed files with 78 additions and 25 deletions

View File

@ -25,9 +25,16 @@ import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.zip.CRC32;
import java.util.zip.CheckedOutputStream;
/**
* Similar class to the {@link String} class except that it internally stores
@ -37,23 +44,79 @@ import java.util.Arrays;
*/
public final class CompressedXContent {
private final byte[] bytes;
private int hashCode;
private static int crc32(BytesReference data) {
OutputStream dummy = new OutputStream() {
@Override
public void write(int b) throws IOException {
// no-op
}
@Override
public void write(byte[] b, int off, int len) throws IOException {
// no-op
}
};
CRC32 crc32 = new CRC32();
try {
data.writeTo(new CheckedOutputStream(dummy, crc32));
} catch (IOException bogus) {
// cannot happen
throw new Error(bogus);
}
return (int) crc32.getValue();
}
private final byte[] bytes;
private final int crc32;
// Used for serialization
private CompressedXContent(byte[] compressed, int crc32) {
this.bytes = compressed;
this.crc32 = crc32;
assertConsistent();
}
/**
* Create a {@link CompressedXContent} out of a {@link ToXContent} instance.
*/
public CompressedXContent(ToXContent xcontent, XContentType type, ToXContent.Params params) throws IOException {
BytesStreamOutput bStream = new BytesStreamOutput();
OutputStream compressedStream = CompressorFactory.defaultCompressor().streamOutput(bStream);
CRC32 crc32 = new CRC32();
OutputStream checkedStream = new CheckedOutputStream(compressedStream, crc32);
try (XContentBuilder builder = XContentFactory.contentBuilder(type, checkedStream)) {
builder.startObject();
xcontent.toXContent(builder, params);
builder.endObject();
}
this.bytes = bStream.bytes().toBytes();
this.crc32 = (int) crc32.getValue();
assertConsistent();
}
/**
* Create a {@link CompressedXContent} out of a serialized {@link ToXContent}
* that may already be compressed.
*/
public CompressedXContent(BytesReference data) throws IOException {
Compressor compressor = CompressorFactory.compressor(data);
if (compressor != null) {
// already compressed...
this.bytes = data.toBytes();
this.crc32 = crc32(new BytesArray(uncompressed()));
} else {
BytesStreamOutput out = new BytesStreamOutput();
try (StreamOutput compressedOutput = CompressorFactory.defaultCompressor().streamOutput(out)) {
try (OutputStream compressedOutput = CompressorFactory.defaultCompressor().streamOutput(out)) {
data.writeTo(compressedOutput);
}
this.bytes = out.bytes().toBytes();
assert CompressorFactory.compressor(new BytesArray(bytes)) != null;
this.crc32 = crc32(data);
}
assertConsistent();
}
private void assertConsistent() {
assert CompressorFactory.compressor(new BytesArray(bytes)) != null;
assert this.crc32 == crc32(new BytesArray(uncompressed()));
}
public CompressedXContent(byte[] data) throws IOException {
@ -88,12 +151,14 @@ public final class CompressedXContent {
}
public static CompressedXContent readCompressedString(StreamInput in) throws IOException {
byte[] bytes = new byte[in.readVInt()];
in.readBytes(bytes, 0, bytes.length);
return new CompressedXContent(bytes);
int crc32 = in.readInt();
byte[] compressed = new byte[in.readVInt()];
in.readBytes(compressed, 0, compressed.length);
return new CompressedXContent(compressed, crc32);
}
public void writeTo(StreamOutput out) throws IOException {
out.writeInt(crc32);
out.writeVInt(bytes.length);
out.writeBytes(bytes);
}
@ -109,19 +174,16 @@ public final class CompressedXContent {
return true;
}
if (crc32 != that.crc32) {
return false;
}
return Arrays.equals(uncompressed(), that.uncompressed());
}
@Override
public int hashCode() {
if (hashCode == 0) {
int h = Arrays.hashCode(uncompressed());
if (h == 0) {
h = 1;
}
hashCode = h;
}
return hashCode;
return crc32;
}
@Override

View File

@ -36,15 +36,12 @@ import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.collect.MapBuilder;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.compress.CompressorFactory;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.StringAndBytesText;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.util.concurrent.ReleasableLock;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.mapper.Mapping.SourceTransform;
import org.elasticsearch.index.mapper.internal.AllFieldMapper;
@ -481,13 +478,7 @@ public class DocumentMapper implements ToXContent {
private void refreshSource() throws ElasticsearchGenerationException {
try {
BytesStreamOutput bStream = new BytesStreamOutput();
try (XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON, CompressorFactory.defaultCompressor().streamOutput(bStream))) {
builder.startObject();
toXContent(builder, ToXContent.EMPTY_PARAMS);
builder.endObject();
}
mappingSource = new CompressedXContent(bStream.bytes());
mappingSource = new CompressedXContent(this, XContentType.JSON, ToXContent.EMPTY_PARAMS);
} catch (Exception e) {
throw new ElasticsearchGenerationException("failed to serialize source for type [" + type + "]", e);
}