Merge pull request #11233 from jpountz/fix/compressedstring_equals
Internal: Fix CompressedString.equals.
This commit is contained in:
commit
e90740ad74
|
@ -24,28 +24,20 @@ import org.elasticsearch.common.bytes.BytesArray;
|
|||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.io.stream.Streamable;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
*
|
||||
* Similar class to the {@link String} class except that it internally stores
|
||||
* data using a compressed representation in order to require less permanent
|
||||
* memory. Note that the compressed string might still sometimes need to be
|
||||
* decompressed in order to perform equality checks or to compute hash codes.
|
||||
*/
|
||||
public class CompressedString implements Streamable {
|
||||
public final class CompressedString {
|
||||
|
||||
private byte[] bytes;
|
||||
|
||||
CompressedString() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor assuming the data provided is compressed (UTF8). It uses the provided
|
||||
* array without copying it.
|
||||
*/
|
||||
public CompressedString(byte[] compressed) {
|
||||
this.bytes = compressed;
|
||||
}
|
||||
private final byte[] bytes;
|
||||
private int hashCode;
|
||||
|
||||
public CompressedString(BytesReference data) throws IOException {
|
||||
Compressor compressor = CompressorFactory.compressor(data);
|
||||
|
@ -55,40 +47,37 @@ public class CompressedString implements Streamable {
|
|||
} else {
|
||||
BytesArray bytesArray = data.toBytesArray();
|
||||
this.bytes = CompressorFactory.defaultCompressor().compress(bytesArray.array(), bytesArray.arrayOffset(), bytesArray.length());
|
||||
assert CompressorFactory.compressor(bytes) == CompressorFactory.defaultCompressor();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new compressed string, assuming the bytes are UTF8, by copying it over.
|
||||
*
|
||||
* @param data The byte array
|
||||
* @param offset Offset into the byte array
|
||||
* @param length The length of the data
|
||||
* @throws IOException
|
||||
*/
|
||||
public CompressedString(byte[] data, int offset, int length) throws IOException {
|
||||
Compressor compressor = CompressorFactory.compressor(data, offset, length);
|
||||
if (compressor != null) {
|
||||
// already compressed...
|
||||
this.bytes = Arrays.copyOfRange(data, offset, offset + length);
|
||||
} else {
|
||||
// default to LZF
|
||||
this.bytes = CompressorFactory.defaultCompressor().compress(data, offset, length);
|
||||
}
|
||||
this(new BytesArray(data, offset, length));
|
||||
}
|
||||
|
||||
public CompressedString(byte[] data) throws IOException {
|
||||
this(data, 0, data.length);
|
||||
}
|
||||
|
||||
public CompressedString(String str) throws IOException {
|
||||
BytesRef result = new BytesRef(str);
|
||||
this.bytes = CompressorFactory.defaultCompressor().compress(result.bytes, result.offset, result.length);
|
||||
this(new BytesArray(new BytesRef(str)));
|
||||
}
|
||||
|
||||
/** Return the compressed bytes. */
|
||||
public byte[] compressed() {
|
||||
return this.bytes;
|
||||
}
|
||||
|
||||
public byte[] uncompressed() throws IOException {
|
||||
/** Return the uncompressed bytes. */
|
||||
public byte[] uncompressed() {
|
||||
Compressor compressor = CompressorFactory.compressor(bytes);
|
||||
return compressor.uncompress(bytes, 0, bytes.length);
|
||||
assert compressor != null;
|
||||
try {
|
||||
return compressor.uncompress(bytes, 0, bytes.length);
|
||||
} catch (IOException e) {
|
||||
throw new IllegalStateException("Cannot decompress compressed string", e);
|
||||
}
|
||||
}
|
||||
|
||||
public String string() throws IOException {
|
||||
|
@ -96,18 +85,11 @@ public class CompressedString implements Streamable {
|
|||
}
|
||||
|
||||
public static CompressedString readCompressedString(StreamInput in) throws IOException {
|
||||
CompressedString compressedString = new CompressedString();
|
||||
compressedString.readFrom(in);
|
||||
return compressedString;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFrom(StreamInput in) throws IOException {
|
||||
bytes = new byte[in.readVInt()];
|
||||
byte[] bytes = new byte[in.readVInt()];
|
||||
in.readBytes(bytes, 0, bytes.length);
|
||||
return new CompressedString(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeVInt(bytes.length);
|
||||
out.writeBytes(bytes);
|
||||
|
@ -120,14 +102,23 @@ public class CompressedString implements Streamable {
|
|||
|
||||
CompressedString that = (CompressedString) o;
|
||||
|
||||
if (!Arrays.equals(bytes, that.bytes)) return false;
|
||||
if (Arrays.equals(compressed(), that.compressed())) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
return Arrays.equals(uncompressed(), that.uncompressed());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return bytes != null ? Arrays.hashCode(bytes) : 0;
|
||||
if (hashCode == 0) {
|
||||
int h = Arrays.hashCode(uncompressed());
|
||||
if (h == 0) {
|
||||
h = 1;
|
||||
}
|
||||
hashCode = h;
|
||||
}
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,8 +20,12 @@
|
|||
package org.elasticsearch.common.compress;
|
||||
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -40,6 +44,12 @@ public class CompressedStringTests extends ElasticsearchTestCase {
|
|||
simpleTests("lzf");
|
||||
}
|
||||
|
||||
private void assertEquals(CompressedString s1, CompressedString s2) {
|
||||
Assert.assertEquals(s1, s2);
|
||||
assertArrayEquals(s1.uncompressed(), s2.uncompressed());
|
||||
assertEquals(s1.hashCode(), s2.hashCode());
|
||||
}
|
||||
|
||||
public void simpleTests(String compressor) throws IOException {
|
||||
CompressorFactory.configure(ImmutableSettings.settingsBuilder().put("compress.default.type", compressor).build());
|
||||
String str = "this is a simple string";
|
||||
|
@ -51,9 +61,9 @@ public class CompressedStringTests extends ElasticsearchTestCase {
|
|||
CompressedString cstr2 = new CompressedString(str2);
|
||||
assertThat(cstr2.string(), not(equalTo(str)));
|
||||
assertThat(new CompressedString(str2), not(equalTo(cstr)));
|
||||
assertThat(new CompressedString(str2), equalTo(cstr2));
|
||||
assertEquals(new CompressedString(str2), cstr2);
|
||||
}
|
||||
|
||||
|
||||
public void testRandom() throws IOException {
|
||||
String compressor = "lzf";
|
||||
CompressorFactory.configure(ImmutableSettings.settingsBuilder().put("compress.default.type", compressor).build());
|
||||
|
@ -64,4 +74,40 @@ public class CompressedStringTests extends ElasticsearchTestCase {
|
|||
assertThat(compressedString.string(), equalTo(string));
|
||||
}
|
||||
}
|
||||
|
||||
public void testDifferentCompressedRepresentation() throws Exception {
|
||||
byte[] b = "abcdefghijabcdefghij".getBytes("UTF-8");
|
||||
CompressorFactory.defaultCompressor();
|
||||
|
||||
Compressor compressor = CompressorFactory.defaultCompressor();
|
||||
BytesStreamOutput bout = new BytesStreamOutput();
|
||||
StreamOutput out = compressor.streamOutput(bout);
|
||||
out.writeBytes(b);
|
||||
out.flush();
|
||||
out.writeBytes(b);
|
||||
out.close();
|
||||
final BytesReference b1 = bout.bytes();
|
||||
|
||||
bout = new BytesStreamOutput();
|
||||
out = compressor.streamOutput(bout);
|
||||
out.writeBytes(b);
|
||||
out.writeBytes(b);
|
||||
out.close();
|
||||
final BytesReference b2 = bout.bytes();
|
||||
|
||||
// because of the intermediate flush, the two compressed representations
|
||||
// are different. It can also happen for other reasons like if hash tables
|
||||
// of different size are being used
|
||||
assertFalse(b1.equals(b2));
|
||||
// we used the compressed representation directly and did not recompress
|
||||
assertArrayEquals(b1.toBytes(), new CompressedString(b1).compressed());
|
||||
assertArrayEquals(b2.toBytes(), new CompressedString(b2).compressed());
|
||||
// but compressedstring instances are still equal
|
||||
assertEquals(new CompressedString(b1), new CompressedString(b2));
|
||||
}
|
||||
|
||||
public void testHashCode() throws IOException {
|
||||
assertFalse(new CompressedString("a").hashCode() == new CompressedString("b").hashCode());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue