BytesReference.Helper should never materialize a byte[] array.

The current implementations of BytesReference.Helper.bytesEquals and
bytesHashCode materialize a byte[] when passed an instance that returns `false`
in `hasArray()`. They should instead do a byte-by-byte comparison/hashcode
computation without materializing the array.

Close #5517
This commit is contained in:
Adrien Grand 2014-03-24 16:03:39 +01:00
parent b4e504df99
commit badedf657a
3 changed files with 108 additions and 24 deletions

View File

@ -20,6 +20,7 @@ package org.elasticsearch.common.bytes;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.util.UnsafeUtils;
import org.jboss.netty.buffer.ChannelBuffer;
import java.io.IOException;
@ -40,32 +41,49 @@ public interface BytesReference {
if (a.length() != b.length()) {
return false;
}
if (!a.hasArray()) {
a = a.toBytesArray();
if (a.hasArray() && b.hasArray()) {
// court-circuit to compare several bytes at once
return UnsafeUtils.equals(a.array(), a.arrayOffset(), b.array(), b.arrayOffset(), a.length());
} else {
return slowBytesEquals(a, b);
}
if (!b.hasArray()) {
b = b.toBytesArray();
}
int bUpTo = b.arrayOffset();
final byte[] aArray = a.array();
final byte[] bArray = b.array();
final int end = a.arrayOffset() + a.length();
for (int aUpTo = a.arrayOffset(); aUpTo < end; aUpTo++, bUpTo++) {
if (aArray[aUpTo] != bArray[bUpTo]) {
// pkg-private for testing
static boolean slowBytesEquals(BytesReference a, BytesReference b) {
assert a.length() == b.length();
for (int i = 0, end = a.length(); i < end; ++i) {
if (a.get(i) != b.get(i)) {
return false;
}
}
return true;
}
public static int bytesHashCode(BytesReference a) {
if (!a.hasArray()) {
a = a.toBytesArray();
if (a.hasArray()) {
return hashCode(a.array(), a.arrayOffset(), a.length());
} else {
return slowHashCode(a);
}
int result = 0;
final int end = a.arrayOffset() + a.length();
for (int i = a.arrayOffset(); i < end; i++) {
result = 31 * result + a.array()[i];
}
// pkg-private for testing
static int hashCode(byte[] array, int offset, int length) {
int result = 1;
for (int i = offset, end = offset + length; i < end; ++i) {
result = 31 * result + array[i];
}
return result;
}
// pkg-private for testing
static int slowHashCode(BytesReference a) {
int result = 1;
for (int i = 0, end = a.length(); i < end; ++i) {
result = 31 * result + a.get(i);
}
return result;
}

View File

@ -73,13 +73,18 @@ public enum UnsafeUtils {
/** Compare the two given {@link BytesRef}s for equality. */
public static boolean equals(BytesRef b1, BytesRef b2) {
int len = b1.length;
if (b2.length != len) {
if (b1.length != b2.length) {
return false;
}
int o1 = b1.offset, o2 = b2.offset;
return equals(b1.bytes, b1.offset, b2.bytes, b2.offset, b1.length);
}
/**
* Compare <code>b1[o1:o1+len)</code>against <code>b1[o2:o2+len)</code>.
*/
public static boolean equals(byte[] b1, int o1, byte[] b2, int o2, int len) {
while (len >= 8) {
if (readLong(b1.bytes, o1) != readLong(b2.bytes, o2)) {
if (readLong(b1, o1) != readLong(b2, o2)) {
return false;
}
len -= 8;
@ -87,7 +92,7 @@ public enum UnsafeUtils {
o2 += 8;
}
if (len >= 4) {
if (readInt(b1.bytes, o1) != readInt(b2.bytes, o2)) {
if (readInt(b1, o1) != readInt(b2, o2)) {
return false;
}
len -= 4;
@ -95,7 +100,7 @@ public enum UnsafeUtils {
o2 += 4;
}
if (len >= 2) {
if (readShort(b1.bytes, o1) != readShort(b2.bytes, o2)) {
if (readShort(b1, o1) != readShort(b2, o2)) {
return false;
}
len -= 2;
@ -103,7 +108,7 @@ public enum UnsafeUtils {
o2 += 2;
}
if (len == 1) {
if (readByte(b1.bytes, o1) != readByte(b2.bytes, o2)) {
if (readByte(b1, o1) != readByte(b2, o2)) {
return false;
}
} else {

View File

@ -0,0 +1,61 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.bytes;
import org.elasticsearch.test.ElasticsearchTestCase;
import java.util.Arrays;
public class BytesReferenceTests extends ElasticsearchTestCase {
public void testEquals() {
final int len = randomIntBetween(0, randomBoolean() ? 10: 100000);
final int offset1 = randomInt(5);
final byte[] array1 = new byte[offset1 + len + randomInt(5)];
getRandom().nextBytes(array1);
final int offset2 = randomInt(offset1);
final byte[] array2 = Arrays.copyOfRange(array1, offset1 - offset2, array1.length);
final BytesArray b1 = new BytesArray(array1, offset1, len);
final BytesArray b2 = new BytesArray(array2, offset2, len);
assertTrue(BytesReference.Helper.bytesEqual(b1, b2));
assertTrue(BytesReference.Helper.slowBytesEquals(b1, b2));
assertEquals(Arrays.hashCode(b1.toBytes()), b1.hashCode());
assertEquals(BytesReference.Helper.bytesHashCode(b1), BytesReference.Helper.slowHashCode(b2));
// test same instance
assertTrue(BytesReference.Helper.bytesEqual(b1, b1));
assertTrue(BytesReference.Helper.slowBytesEquals(b1, b1));
assertEquals(BytesReference.Helper.bytesHashCode(b1), BytesReference.Helper.slowHashCode(b1));
if (len > 0) {
// test different length
BytesArray differentLen = new BytesArray(array1, offset1, randomInt(len - 1));
assertFalse(BytesReference.Helper.bytesEqual(b1, differentLen));
// test changed bytes
array1[offset1 + randomInt(len - 1)] += 13;
assertFalse(BytesReference.Helper.bytesEqual(b1, b2));
assertFalse(BytesReference.Helper.slowBytesEquals(b1, b2));
}
}
}