Internal: Fix CompressedString.equals.
CompressedString relied on the assumption that two CompressedString instanes are equal if there compressed representation are equal. Unfortunately this is not always true because the compressed representation also depends on when flush() was called on the output stream or on the size of the hash table that has been used at compression time.
This commit is contained in:
parent
5fbb6a714d
commit
fd1954d74f
|
@ -24,28 +24,20 @@ import org.elasticsearch.common.bytes.BytesArray;
|
|||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.io.stream.Streamable;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
*
|
||||
* Similar class to the {@link String} class except that it internally stores
|
||||
* data using a compressed representation in order to require less permanent
|
||||
* memory. Note that the compressed string might still sometimes need to be
|
||||
* decompressed in order to perform equality checks or to compute hash codes.
|
||||
*/
|
||||
public class CompressedString implements Streamable {
|
||||
public final class CompressedString {
|
||||
|
||||
private byte[] bytes;
|
||||
|
||||
CompressedString() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor assuming the data provided is compressed (UTF8). It uses the provided
|
||||
* array without copying it.
|
||||
*/
|
||||
public CompressedString(byte[] compressed) {
|
||||
this.bytes = compressed;
|
||||
}
|
||||
private final byte[] bytes;
|
||||
private int hashCode;
|
||||
|
||||
public CompressedString(BytesReference data) throws IOException {
|
||||
Compressor compressor = CompressorFactory.compressor(data);
|
||||
|
@ -55,40 +47,37 @@ public class CompressedString implements Streamable {
|
|||
} else {
|
||||
BytesArray bytesArray = data.toBytesArray();
|
||||
this.bytes = CompressorFactory.defaultCompressor().compress(bytesArray.array(), bytesArray.arrayOffset(), bytesArray.length());
|
||||
assert CompressorFactory.compressor(bytes) == CompressorFactory.defaultCompressor();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new compressed string, assuming the bytes are UTF8, by copying it over.
|
||||
*
|
||||
* @param data The byte array
|
||||
* @param offset Offset into the byte array
|
||||
* @param length The length of the data
|
||||
* @throws IOException
|
||||
*/
|
||||
public CompressedString(byte[] data, int offset, int length) throws IOException {
|
||||
Compressor compressor = CompressorFactory.compressor(data, offset, length);
|
||||
if (compressor != null) {
|
||||
// already compressed...
|
||||
this.bytes = Arrays.copyOfRange(data, offset, offset + length);
|
||||
} else {
|
||||
// default to LZF
|
||||
this.bytes = CompressorFactory.defaultCompressor().compress(data, offset, length);
|
||||
}
|
||||
this(new BytesArray(data, offset, length));
|
||||
}
|
||||
|
||||
public CompressedString(byte[] data) throws IOException {
|
||||
this(data, 0, data.length);
|
||||
}
|
||||
|
||||
public CompressedString(String str) throws IOException {
|
||||
BytesRef result = new BytesRef(str);
|
||||
this.bytes = CompressorFactory.defaultCompressor().compress(result.bytes, result.offset, result.length);
|
||||
this(new BytesArray(new BytesRef(str)));
|
||||
}
|
||||
|
||||
/** Return the compressed bytes. */
|
||||
public byte[] compressed() {
|
||||
return this.bytes;
|
||||
}
|
||||
|
||||
public byte[] uncompressed() throws IOException {
|
||||
/** Return the uncompressed bytes. */
|
||||
public byte[] uncompressed() {
|
||||
Compressor compressor = CompressorFactory.compressor(bytes);
|
||||
return compressor.uncompress(bytes, 0, bytes.length);
|
||||
assert compressor != null;
|
||||
try {
|
||||
return compressor.uncompress(bytes, 0, bytes.length);
|
||||
} catch (IOException e) {
|
||||
throw new IllegalStateException("Cannot decompress compressed string", e);
|
||||
}
|
||||
}
|
||||
|
||||
public String string() throws IOException {
|
||||
|
@ -96,18 +85,11 @@ public class CompressedString implements Streamable {
|
|||
}
|
||||
|
||||
public static CompressedString readCompressedString(StreamInput in) throws IOException {
|
||||
CompressedString compressedString = new CompressedString();
|
||||
compressedString.readFrom(in);
|
||||
return compressedString;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFrom(StreamInput in) throws IOException {
|
||||
bytes = new byte[in.readVInt()];
|
||||
byte[] bytes = new byte[in.readVInt()];
|
||||
in.readBytes(bytes, 0, bytes.length);
|
||||
return new CompressedString(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeVInt(bytes.length);
|
||||
out.writeBytes(bytes);
|
||||
|
@ -120,14 +102,23 @@ public class CompressedString implements Streamable {
|
|||
|
||||
CompressedString that = (CompressedString) o;
|
||||
|
||||
if (!Arrays.equals(bytes, that.bytes)) return false;
|
||||
if (Arrays.equals(compressed(), that.compressed())) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
return Arrays.equals(uncompressed(), that.uncompressed());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return bytes != null ? Arrays.hashCode(bytes) : 0;
|
||||
if (hashCode == 0) {
|
||||
int h = Arrays.hashCode(uncompressed());
|
||||
if (h == 0) {
|
||||
h = 1;
|
||||
}
|
||||
hashCode = h;
|
||||
}
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,8 +20,12 @@
|
|||
package org.elasticsearch.common.compress;
|
||||
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -40,6 +44,12 @@ public class CompressedStringTests extends ElasticsearchTestCase {
|
|||
simpleTests("lzf");
|
||||
}
|
||||
|
||||
private void assertEquals(CompressedString s1, CompressedString s2) {
|
||||
Assert.assertEquals(s1, s2);
|
||||
assertArrayEquals(s1.uncompressed(), s2.uncompressed());
|
||||
assertEquals(s1.hashCode(), s2.hashCode());
|
||||
}
|
||||
|
||||
public void simpleTests(String compressor) throws IOException {
|
||||
CompressorFactory.configure(ImmutableSettings.settingsBuilder().put("compress.default.type", compressor).build());
|
||||
String str = "this is a simple string";
|
||||
|
@ -51,9 +61,9 @@ public class CompressedStringTests extends ElasticsearchTestCase {
|
|||
CompressedString cstr2 = new CompressedString(str2);
|
||||
assertThat(cstr2.string(), not(equalTo(str)));
|
||||
assertThat(new CompressedString(str2), not(equalTo(cstr)));
|
||||
assertThat(new CompressedString(str2), equalTo(cstr2));
|
||||
assertEquals(new CompressedString(str2), cstr2);
|
||||
}
|
||||
|
||||
|
||||
public void testRandom() throws IOException {
|
||||
String compressor = "lzf";
|
||||
CompressorFactory.configure(ImmutableSettings.settingsBuilder().put("compress.default.type", compressor).build());
|
||||
|
@ -64,4 +74,40 @@ public class CompressedStringTests extends ElasticsearchTestCase {
|
|||
assertThat(compressedString.string(), equalTo(string));
|
||||
}
|
||||
}
|
||||
|
||||
public void testDifferentCompressedRepresentation() throws Exception {
|
||||
byte[] b = "abcdefghijabcdefghij".getBytes("UTF-8");
|
||||
CompressorFactory.defaultCompressor();
|
||||
|
||||
Compressor compressor = CompressorFactory.defaultCompressor();
|
||||
BytesStreamOutput bout = new BytesStreamOutput();
|
||||
StreamOutput out = compressor.streamOutput(bout);
|
||||
out.writeBytes(b);
|
||||
out.flush();
|
||||
out.writeBytes(b);
|
||||
out.close();
|
||||
final BytesReference b1 = bout.bytes();
|
||||
|
||||
bout = new BytesStreamOutput();
|
||||
out = compressor.streamOutput(bout);
|
||||
out.writeBytes(b);
|
||||
out.writeBytes(b);
|
||||
out.close();
|
||||
final BytesReference b2 = bout.bytes();
|
||||
|
||||
// because of the intermediate flush, the two compressed representations
|
||||
// are different. It can also happen for other reasons like if hash tables
|
||||
// of different size are being used
|
||||
assertFalse(b1.equals(b2));
|
||||
// we used the compressed representation directly and did not recompress
|
||||
assertArrayEquals(b1.toBytes(), new CompressedString(b1).compressed());
|
||||
assertArrayEquals(b2.toBytes(), new CompressedString(b2).compressed());
|
||||
// but compressedstring instances are still equal
|
||||
assertEquals(new CompressedString(b1), new CompressedString(b2));
|
||||
}
|
||||
|
||||
public void testHashCode() throws IOException {
|
||||
assertFalse(new CompressedString("a").hashCode() == new CompressedString("b").hashCode());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue