Merge pull request #11233 from jpountz/fix/compressedstring_equals
Internal: Fix CompressedString.equals.
This commit is contained in:
commit
e90740ad74
|
@ -24,28 +24,20 @@ import org.elasticsearch.common.bytes.BytesArray;
|
||||||
import org.elasticsearch.common.bytes.BytesReference;
|
import org.elasticsearch.common.bytes.BytesReference;
|
||||||
import org.elasticsearch.common.io.stream.StreamInput;
|
import org.elasticsearch.common.io.stream.StreamInput;
|
||||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||||
import org.elasticsearch.common.io.stream.Streamable;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* Similar class to the {@link String} class except that it internally stores
|
||||||
|
* data using a compressed representation in order to require less permanent
|
||||||
|
* memory. Note that the compressed string might still sometimes need to be
|
||||||
|
* decompressed in order to perform equality checks or to compute hash codes.
|
||||||
*/
|
*/
|
||||||
public class CompressedString implements Streamable {
|
public final class CompressedString {
|
||||||
|
|
||||||
private byte[] bytes;
|
private final byte[] bytes;
|
||||||
|
private int hashCode;
|
||||||
CompressedString() {
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor assuming the data provided is compressed (UTF8). It uses the provided
|
|
||||||
* array without copying it.
|
|
||||||
*/
|
|
||||||
public CompressedString(byte[] compressed) {
|
|
||||||
this.bytes = compressed;
|
|
||||||
}
|
|
||||||
|
|
||||||
public CompressedString(BytesReference data) throws IOException {
|
public CompressedString(BytesReference data) throws IOException {
|
||||||
Compressor compressor = CompressorFactory.compressor(data);
|
Compressor compressor = CompressorFactory.compressor(data);
|
||||||
|
@ -55,40 +47,37 @@ public class CompressedString implements Streamable {
|
||||||
} else {
|
} else {
|
||||||
BytesArray bytesArray = data.toBytesArray();
|
BytesArray bytesArray = data.toBytesArray();
|
||||||
this.bytes = CompressorFactory.defaultCompressor().compress(bytesArray.array(), bytesArray.arrayOffset(), bytesArray.length());
|
this.bytes = CompressorFactory.defaultCompressor().compress(bytesArray.array(), bytesArray.arrayOffset(), bytesArray.length());
|
||||||
|
assert CompressorFactory.compressor(bytes) == CompressorFactory.defaultCompressor();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a new compressed string, assuming the bytes are UTF8, by copying it over.
|
|
||||||
*
|
|
||||||
* @param data The byte array
|
|
||||||
* @param offset Offset into the byte array
|
|
||||||
* @param length The length of the data
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public CompressedString(byte[] data, int offset, int length) throws IOException {
|
public CompressedString(byte[] data, int offset, int length) throws IOException {
|
||||||
Compressor compressor = CompressorFactory.compressor(data, offset, length);
|
this(new BytesArray(data, offset, length));
|
||||||
if (compressor != null) {
|
}
|
||||||
// already compressed...
|
|
||||||
this.bytes = Arrays.copyOfRange(data, offset, offset + length);
|
public CompressedString(byte[] data) throws IOException {
|
||||||
} else {
|
this(data, 0, data.length);
|
||||||
// default to LZF
|
|
||||||
this.bytes = CompressorFactory.defaultCompressor().compress(data, offset, length);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public CompressedString(String str) throws IOException {
|
public CompressedString(String str) throws IOException {
|
||||||
BytesRef result = new BytesRef(str);
|
this(new BytesArray(new BytesRef(str)));
|
||||||
this.bytes = CompressorFactory.defaultCompressor().compress(result.bytes, result.offset, result.length);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Return the compressed bytes. */
|
||||||
public byte[] compressed() {
|
public byte[] compressed() {
|
||||||
return this.bytes;
|
return this.bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] uncompressed() throws IOException {
|
/** Return the uncompressed bytes. */
|
||||||
|
public byte[] uncompressed() {
|
||||||
Compressor compressor = CompressorFactory.compressor(bytes);
|
Compressor compressor = CompressorFactory.compressor(bytes);
|
||||||
return compressor.uncompress(bytes, 0, bytes.length);
|
assert compressor != null;
|
||||||
|
try {
|
||||||
|
return compressor.uncompress(bytes, 0, bytes.length);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new IllegalStateException("Cannot decompress compressed string", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public String string() throws IOException {
|
public String string() throws IOException {
|
||||||
|
@ -96,18 +85,11 @@ public class CompressedString implements Streamable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static CompressedString readCompressedString(StreamInput in) throws IOException {
|
public static CompressedString readCompressedString(StreamInput in) throws IOException {
|
||||||
CompressedString compressedString = new CompressedString();
|
byte[] bytes = new byte[in.readVInt()];
|
||||||
compressedString.readFrom(in);
|
|
||||||
return compressedString;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void readFrom(StreamInput in) throws IOException {
|
|
||||||
bytes = new byte[in.readVInt()];
|
|
||||||
in.readBytes(bytes, 0, bytes.length);
|
in.readBytes(bytes, 0, bytes.length);
|
||||||
|
return new CompressedString(bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void writeTo(StreamOutput out) throws IOException {
|
public void writeTo(StreamOutput out) throws IOException {
|
||||||
out.writeVInt(bytes.length);
|
out.writeVInt(bytes.length);
|
||||||
out.writeBytes(bytes);
|
out.writeBytes(bytes);
|
||||||
|
@ -120,14 +102,23 @@ public class CompressedString implements Streamable {
|
||||||
|
|
||||||
CompressedString that = (CompressedString) o;
|
CompressedString that = (CompressedString) o;
|
||||||
|
|
||||||
if (!Arrays.equals(bytes, that.bytes)) return false;
|
if (Arrays.equals(compressed(), that.compressed())) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return Arrays.equals(uncompressed(), that.uncompressed());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return bytes != null ? Arrays.hashCode(bytes) : 0;
|
if (hashCode == 0) {
|
||||||
|
int h = Arrays.hashCode(uncompressed());
|
||||||
|
if (h == 0) {
|
||||||
|
h = 1;
|
||||||
|
}
|
||||||
|
hashCode = h;
|
||||||
|
}
|
||||||
|
return hashCode;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -20,8 +20,12 @@
|
||||||
package org.elasticsearch.common.compress;
|
package org.elasticsearch.common.compress;
|
||||||
|
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
import org.elasticsearch.common.bytes.BytesReference;
|
||||||
|
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||||
|
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||||
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -40,6 +44,12 @@ public class CompressedStringTests extends ElasticsearchTestCase {
|
||||||
simpleTests("lzf");
|
simpleTests("lzf");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void assertEquals(CompressedString s1, CompressedString s2) {
|
||||||
|
Assert.assertEquals(s1, s2);
|
||||||
|
assertArrayEquals(s1.uncompressed(), s2.uncompressed());
|
||||||
|
assertEquals(s1.hashCode(), s2.hashCode());
|
||||||
|
}
|
||||||
|
|
||||||
public void simpleTests(String compressor) throws IOException {
|
public void simpleTests(String compressor) throws IOException {
|
||||||
CompressorFactory.configure(ImmutableSettings.settingsBuilder().put("compress.default.type", compressor).build());
|
CompressorFactory.configure(ImmutableSettings.settingsBuilder().put("compress.default.type", compressor).build());
|
||||||
String str = "this is a simple string";
|
String str = "this is a simple string";
|
||||||
|
@ -51,9 +61,9 @@ public class CompressedStringTests extends ElasticsearchTestCase {
|
||||||
CompressedString cstr2 = new CompressedString(str2);
|
CompressedString cstr2 = new CompressedString(str2);
|
||||||
assertThat(cstr2.string(), not(equalTo(str)));
|
assertThat(cstr2.string(), not(equalTo(str)));
|
||||||
assertThat(new CompressedString(str2), not(equalTo(cstr)));
|
assertThat(new CompressedString(str2), not(equalTo(cstr)));
|
||||||
assertThat(new CompressedString(str2), equalTo(cstr2));
|
assertEquals(new CompressedString(str2), cstr2);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRandom() throws IOException {
|
public void testRandom() throws IOException {
|
||||||
String compressor = "lzf";
|
String compressor = "lzf";
|
||||||
CompressorFactory.configure(ImmutableSettings.settingsBuilder().put("compress.default.type", compressor).build());
|
CompressorFactory.configure(ImmutableSettings.settingsBuilder().put("compress.default.type", compressor).build());
|
||||||
|
@ -64,4 +74,40 @@ public class CompressedStringTests extends ElasticsearchTestCase {
|
||||||
assertThat(compressedString.string(), equalTo(string));
|
assertThat(compressedString.string(), equalTo(string));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testDifferentCompressedRepresentation() throws Exception {
|
||||||
|
byte[] b = "abcdefghijabcdefghij".getBytes("UTF-8");
|
||||||
|
CompressorFactory.defaultCompressor();
|
||||||
|
|
||||||
|
Compressor compressor = CompressorFactory.defaultCompressor();
|
||||||
|
BytesStreamOutput bout = new BytesStreamOutput();
|
||||||
|
StreamOutput out = compressor.streamOutput(bout);
|
||||||
|
out.writeBytes(b);
|
||||||
|
out.flush();
|
||||||
|
out.writeBytes(b);
|
||||||
|
out.close();
|
||||||
|
final BytesReference b1 = bout.bytes();
|
||||||
|
|
||||||
|
bout = new BytesStreamOutput();
|
||||||
|
out = compressor.streamOutput(bout);
|
||||||
|
out.writeBytes(b);
|
||||||
|
out.writeBytes(b);
|
||||||
|
out.close();
|
||||||
|
final BytesReference b2 = bout.bytes();
|
||||||
|
|
||||||
|
// because of the intermediate flush, the two compressed representations
|
||||||
|
// are different. It can also happen for other reasons like if hash tables
|
||||||
|
// of different size are being used
|
||||||
|
assertFalse(b1.equals(b2));
|
||||||
|
// we used the compressed representation directly and did not recompress
|
||||||
|
assertArrayEquals(b1.toBytes(), new CompressedString(b1).compressed());
|
||||||
|
assertArrayEquals(b2.toBytes(), new CompressedString(b2).compressed());
|
||||||
|
// but compressedstring instances are still equal
|
||||||
|
assertEquals(new CompressedString(b1), new CompressedString(b2));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testHashCode() throws IOException {
|
||||||
|
assertFalse(new CompressedString("a").hashCode() == new CompressedString("b").hashCode());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue