Merge pull request #11233 from jpountz/fix/compressedstring_equals

Internal: Fix CompressedString.equals.
This commit is contained in:
Adrien Grand 2015-05-20 08:39:51 +02:00
commit e90740ad74
2 changed files with 86 additions and 49 deletions

View File

@ -24,28 +24,20 @@ import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Streamable;
import java.io.IOException;
import java.util.Arrays;
/**
*
* Similar class to the {@link String} class except that it internally stores
* data using a compressed representation in order to require less permanent
* memory. Note that the compressed string might still sometimes need to be
* decompressed in order to perform equality checks or to compute hash codes.
*/
public class CompressedString implements Streamable {
public final class CompressedString {
private byte[] bytes;
CompressedString() {
}
/**
* Constructor assuming the data provided is compressed (UTF8). It uses the provided
* array without copying it.
*/
public CompressedString(byte[] compressed) {
this.bytes = compressed;
}
private final byte[] bytes;
private int hashCode;
public CompressedString(BytesReference data) throws IOException {
Compressor compressor = CompressorFactory.compressor(data);
@ -55,40 +47,37 @@ public class CompressedString implements Streamable {
} else {
BytesArray bytesArray = data.toBytesArray();
this.bytes = CompressorFactory.defaultCompressor().compress(bytesArray.array(), bytesArray.arrayOffset(), bytesArray.length());
assert CompressorFactory.compressor(bytes) == CompressorFactory.defaultCompressor();
}
}
/**
* Constructs a new compressed string, assuming the bytes are UTF8, by copying it over.
*
* @param data The byte array
* @param offset Offset into the byte array
* @param length The length of the data
* @throws IOException
*/
public CompressedString(byte[] data, int offset, int length) throws IOException {
Compressor compressor = CompressorFactory.compressor(data, offset, length);
if (compressor != null) {
// already compressed...
this.bytes = Arrays.copyOfRange(data, offset, offset + length);
} else {
// default to LZF
this.bytes = CompressorFactory.defaultCompressor().compress(data, offset, length);
}
this(new BytesArray(data, offset, length));
}
public CompressedString(byte[] data) throws IOException {
this(data, 0, data.length);
}
public CompressedString(String str) throws IOException {
BytesRef result = new BytesRef(str);
this.bytes = CompressorFactory.defaultCompressor().compress(result.bytes, result.offset, result.length);
this(new BytesArray(new BytesRef(str)));
}
/** Return the compressed bytes. */
public byte[] compressed() {
return this.bytes;
}
public byte[] uncompressed() throws IOException {
/** Return the uncompressed bytes. */
public byte[] uncompressed() {
Compressor compressor = CompressorFactory.compressor(bytes);
return compressor.uncompress(bytes, 0, bytes.length);
assert compressor != null;
try {
return compressor.uncompress(bytes, 0, bytes.length);
} catch (IOException e) {
throw new IllegalStateException("Cannot decompress compressed string", e);
}
}
public String string() throws IOException {
@ -96,18 +85,11 @@ public class CompressedString implements Streamable {
}
public static CompressedString readCompressedString(StreamInput in) throws IOException {
CompressedString compressedString = new CompressedString();
compressedString.readFrom(in);
return compressedString;
}
@Override
public void readFrom(StreamInput in) throws IOException {
bytes = new byte[in.readVInt()];
byte[] bytes = new byte[in.readVInt()];
in.readBytes(bytes, 0, bytes.length);
return new CompressedString(bytes);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeVInt(bytes.length);
out.writeBytes(bytes);
@ -120,14 +102,23 @@ public class CompressedString implements Streamable {
CompressedString that = (CompressedString) o;
if (!Arrays.equals(bytes, that.bytes)) return false;
if (Arrays.equals(compressed(), that.compressed())) {
return true;
}
return true;
return Arrays.equals(uncompressed(), that.uncompressed());
}
@Override
public int hashCode() {
return bytes != null ? Arrays.hashCode(bytes) : 0;
if (hashCode == 0) {
int h = Arrays.hashCode(uncompressed());
if (h == 0) {
h = 1;
}
hashCode = h;
}
return hashCode;
}
@Override

View File

@ -20,8 +20,12 @@
package org.elasticsearch.common.compress;
import org.apache.lucene.util.TestUtil;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.test.ElasticsearchTestCase;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
@ -40,6 +44,12 @@ public class CompressedStringTests extends ElasticsearchTestCase {
simpleTests("lzf");
}
private void assertEquals(CompressedString s1, CompressedString s2) {
Assert.assertEquals(s1, s2);
assertArrayEquals(s1.uncompressed(), s2.uncompressed());
assertEquals(s1.hashCode(), s2.hashCode());
}
public void simpleTests(String compressor) throws IOException {
CompressorFactory.configure(ImmutableSettings.settingsBuilder().put("compress.default.type", compressor).build());
String str = "this is a simple string";
@ -51,9 +61,9 @@ public class CompressedStringTests extends ElasticsearchTestCase {
CompressedString cstr2 = new CompressedString(str2);
assertThat(cstr2.string(), not(equalTo(str)));
assertThat(new CompressedString(str2), not(equalTo(cstr)));
assertThat(new CompressedString(str2), equalTo(cstr2));
assertEquals(new CompressedString(str2), cstr2);
}
public void testRandom() throws IOException {
String compressor = "lzf";
CompressorFactory.configure(ImmutableSettings.settingsBuilder().put("compress.default.type", compressor).build());
@ -64,4 +74,40 @@ public class CompressedStringTests extends ElasticsearchTestCase {
assertThat(compressedString.string(), equalTo(string));
}
}
public void testDifferentCompressedRepresentation() throws Exception {
byte[] b = "abcdefghijabcdefghij".getBytes("UTF-8");
CompressorFactory.defaultCompressor();
Compressor compressor = CompressorFactory.defaultCompressor();
BytesStreamOutput bout = new BytesStreamOutput();
StreamOutput out = compressor.streamOutput(bout);
out.writeBytes(b);
out.flush();
out.writeBytes(b);
out.close();
final BytesReference b1 = bout.bytes();
bout = new BytesStreamOutput();
out = compressor.streamOutput(bout);
out.writeBytes(b);
out.writeBytes(b);
out.close();
final BytesReference b2 = bout.bytes();
// because of the intermediate flush, the two compressed representations
// are different. It can also happen for other reasons like if hash tables
// of different size are being used
assertFalse(b1.equals(b2));
// we used the compressed representation directly and did not recompress
assertArrayEquals(b1.toBytes(), new CompressedString(b1).compressed());
assertArrayEquals(b2.toBytes(), new CompressedString(b2).compressed());
// but compressedstring instances are still equal
assertEquals(new CompressedString(b1), new CompressedString(b2));
}
public void testHashCode() throws IOException {
assertFalse(new CompressedString("a").hashCode() == new CompressedString("b").hashCode());
}
}