Allow Uid#decodeId to decode from a byte array slice (#26987)
Today we only allow to decode byte arrays where the data has a 0 offset and the same length as the array. Allowing to decode stuff from a slice will make decoding IDs cheaper if the the ID is for instance coming from a term dictionary or BytesRef. Relates to #26931
This commit is contained in:
parent
93a47cf860
commit
047a916169
|
@ -135,36 +135,36 @@ public final class Uid {
|
||||||
// 'xxx=' and 'xxx' could be considered the same id
|
// 'xxx=' and 'xxx' could be considered the same id
|
||||||
final int length = id.length();
|
final int length = id.length();
|
||||||
switch (length & 0x03) {
|
switch (length & 0x03) {
|
||||||
case 0:
|
case 0:
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
return false;
|
|
||||||
case 2:
|
|
||||||
// the last 2 symbols (12 bits) are encoding 1 byte (8 bits)
|
|
||||||
// so the last symbol only actually uses 8-6=2 bits and can only take 4 values
|
|
||||||
char last = id.charAt(length - 1);
|
|
||||||
if (last != 'A' && last != 'Q' && last != 'g' && last != 'w') {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
case 2:
|
||||||
break;
|
// the last 2 symbols (12 bits) are encoding 1 byte (8 bits)
|
||||||
case 3:
|
// so the last symbol only actually uses 8-6=2 bits and can only take 4 values
|
||||||
// The last 3 symbols (18 bits) are encoding 2 bytes (16 bits)
|
char last = id.charAt(length - 1);
|
||||||
// so the last symbol only actually uses 16-12=4 bits and can only take 16 values
|
if (last != 'A' && last != 'Q' && last != 'g' && last != 'w') {
|
||||||
last = id.charAt(length - 1);
|
return false;
|
||||||
if (last != 'A' && last != 'E' && last != 'I' && last != 'M' && last != 'Q'&& last != 'U'&& last != 'Y'
|
}
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
// The last 3 symbols (18 bits) are encoding 2 bytes (16 bits)
|
||||||
|
// so the last symbol only actually uses 16-12=4 bits and can only take 16 values
|
||||||
|
last = id.charAt(length - 1);
|
||||||
|
if (last != 'A' && last != 'E' && last != 'I' && last != 'M' && last != 'Q'&& last != 'U'&& last != 'Y'
|
||||||
&& last != 'c'&& last != 'g'&& last != 'k' && last != 'o' && last != 's' && last != 'w'
|
&& last != 'c'&& last != 'g'&& last != 'k' && last != 'o' && last != 's' && last != 'w'
|
||||||
&& last != '0' && last != '4' && last != '8') {
|
&& last != '0' && last != '4' && last != '8') {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
// number & 0x03 is always in [0,3]
|
// number & 0x03 is always in [0,3]
|
||||||
throw new AssertionError("Impossible case");
|
throw new AssertionError("Impossible case");
|
||||||
}
|
}
|
||||||
for (int i = 0; i < length; ++i) {
|
for (int i = 0; i < length; ++i) {
|
||||||
final char c = id.charAt(i);
|
final char c = id.charAt(i);
|
||||||
final boolean allowed =
|
final boolean allowed =
|
||||||
(c >= '0' && c <= '9') ||
|
(c >= '0' && c <= '9') ||
|
||||||
(c >= 'A' && c <= 'Z') ||
|
(c >= 'A' && c <= 'Z') ||
|
||||||
(c >= 'a' && c <= 'z') ||
|
(c >= 'a' && c <= 'z') ||
|
||||||
c == '-' || c == '_';
|
c == '-' || c == '_';
|
||||||
|
@ -244,16 +244,16 @@ public final class Uid {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String decodeNumericId(byte[] idBytes) {
|
private static String decodeNumericId(byte[] idBytes, int offset, int len) {
|
||||||
assert Byte.toUnsignedInt(idBytes[0]) == NUMERIC;
|
assert Byte.toUnsignedInt(idBytes[offset]) == NUMERIC;
|
||||||
int length = (idBytes.length - 1) * 2;
|
int length = (len - 1) * 2;
|
||||||
char[] chars = new char[length];
|
char[] chars = new char[length];
|
||||||
for (int i = 1; i < idBytes.length; ++i) {
|
for (int i = 1; i < len; ++i) {
|
||||||
final int b = Byte.toUnsignedInt(idBytes[i]);
|
final int b = Byte.toUnsignedInt(idBytes[offset + i]);
|
||||||
final int b1 = (b >>> 4);
|
final int b1 = (b >>> 4);
|
||||||
final int b2 = b & 0x0f;
|
final int b2 = b & 0x0f;
|
||||||
chars[(i - 1) * 2] = (char) (b1 + '0');
|
chars[(i - 1) * 2] = (char) (b1 + '0');
|
||||||
if (i == idBytes.length - 1 && b2 == 0x0f) {
|
if (i == len - 1 && b2 == 0x0f) {
|
||||||
length--;
|
length--;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -262,15 +262,17 @@ public final class Uid {
|
||||||
return new String(chars, 0, length);
|
return new String(chars, 0, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String decodeUtf8Id(byte[] idBytes) {
|
private static String decodeUtf8Id(byte[] idBytes, int offset, int length) {
|
||||||
assert Byte.toUnsignedInt(idBytes[0]) == UTF8;
|
assert Byte.toUnsignedInt(idBytes[offset]) == UTF8;
|
||||||
return new BytesRef(idBytes, 1, idBytes.length - 1).utf8ToString();
|
return new BytesRef(idBytes, offset + 1, length - 1).utf8ToString();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String decodeBase64Id(byte[] idBytes) {
|
private static String decodeBase64Id(byte[] idBytes, int offset, int length) {
|
||||||
assert Byte.toUnsignedInt(idBytes[0]) <= BASE64_ESCAPE;
|
assert Byte.toUnsignedInt(idBytes[offset]) <= BASE64_ESCAPE;
|
||||||
if (Byte.toUnsignedInt(idBytes[0]) == BASE64_ESCAPE) {
|
if (Byte.toUnsignedInt(idBytes[offset]) == BASE64_ESCAPE) {
|
||||||
idBytes = Arrays.copyOfRange(idBytes, 1, idBytes.length);
|
idBytes = Arrays.copyOfRange(idBytes, offset + 1, offset + length);
|
||||||
|
} else if ((idBytes.length == length && offset == 0) == false) { // no need to copy if it's not a slice
|
||||||
|
idBytes = Arrays.copyOfRange(idBytes, offset, offset + length);
|
||||||
}
|
}
|
||||||
return Base64.getUrlEncoder().withoutPadding().encodeToString(idBytes);
|
return Base64.getUrlEncoder().withoutPadding().encodeToString(idBytes);
|
||||||
}
|
}
|
||||||
|
@ -278,17 +280,23 @@ public final class Uid {
|
||||||
/** Decode an indexed id back to its original form.
|
/** Decode an indexed id back to its original form.
|
||||||
* @see #encodeId */
|
* @see #encodeId */
|
||||||
public static String decodeId(byte[] idBytes) {
|
public static String decodeId(byte[] idBytes) {
|
||||||
if (idBytes.length == 0) {
|
return decodeId(idBytes, 0, idBytes.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Decode an indexed id back to its original form.
|
||||||
|
* @see #encodeId */
|
||||||
|
public static String decodeId(byte[] idBytes, int offset, int length) {
|
||||||
|
if (length == 0) {
|
||||||
throw new IllegalArgumentException("Ids can't be empty");
|
throw new IllegalArgumentException("Ids can't be empty");
|
||||||
}
|
}
|
||||||
final int magicChar = Byte.toUnsignedInt(idBytes[0]);
|
final int magicChar = Byte.toUnsignedInt(idBytes[offset]);
|
||||||
switch (magicChar) {
|
switch (magicChar) {
|
||||||
case NUMERIC:
|
case NUMERIC:
|
||||||
return decodeNumericId(idBytes);
|
return decodeNumericId(idBytes, offset, length);
|
||||||
case UTF8:
|
case UTF8:
|
||||||
return decodeUtf8Id(idBytes);
|
return decodeUtf8Id(idBytes, offset, length);
|
||||||
default:
|
default:
|
||||||
return decodeBase64Id(idBytes);
|
return decodeBase64Id(idBytes, offset, length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -79,7 +79,7 @@ public class UidTests extends ESTestCase {
|
||||||
for (int iter = 0; iter < iters; ++iter) {
|
for (int iter = 0; iter < iters; ++iter) {
|
||||||
final String id = TestUtil.randomRealisticUnicodeString(random(), 1, 10);
|
final String id = TestUtil.randomRealisticUnicodeString(random(), 1, 10);
|
||||||
BytesRef encoded = Uid.encodeId(id);
|
BytesRef encoded = Uid.encodeId(id);
|
||||||
assertEquals(id, Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length)));
|
assertEquals(id, doDecodeId(encoded));
|
||||||
assertTrue(encoded.length <= 1 + new BytesRef(id).length);
|
assertTrue(encoded.length <= 1 + new BytesRef(id).length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -93,7 +93,7 @@ public class UidTests extends ESTestCase {
|
||||||
id = "0" + id;
|
id = "0" + id;
|
||||||
}
|
}
|
||||||
BytesRef encoded = Uid.encodeId(id);
|
BytesRef encoded = Uid.encodeId(id);
|
||||||
assertEquals(id, Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length)));
|
assertEquals(id, doDecodeId(encoded));
|
||||||
assertEquals(1 + (id.length() + 1) / 2, encoded.length);
|
assertEquals(1 + (id.length() + 1) / 2, encoded.length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -105,9 +105,26 @@ public class UidTests extends ESTestCase {
|
||||||
random().nextBytes(binaryId);
|
random().nextBytes(binaryId);
|
||||||
final String id = Base64.getUrlEncoder().withoutPadding().encodeToString(binaryId);
|
final String id = Base64.getUrlEncoder().withoutPadding().encodeToString(binaryId);
|
||||||
BytesRef encoded = Uid.encodeId(id);
|
BytesRef encoded = Uid.encodeId(id);
|
||||||
assertEquals(id, Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length)));
|
assertEquals(id, doDecodeId(encoded));
|
||||||
assertTrue(encoded.length <= 1 + binaryId.length);
|
assertTrue(encoded.length <= 1 + binaryId.length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static String doDecodeId(BytesRef encoded) {
|
||||||
|
|
||||||
|
if (randomBoolean()) {
|
||||||
|
return Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length));
|
||||||
|
} else {
|
||||||
|
if (randomBoolean()) {
|
||||||
|
BytesRef slicedCopy = new BytesRef(randomIntBetween(encoded.length + 1, encoded.length + 100));
|
||||||
|
slicedCopy.offset = randomIntBetween(1, slicedCopy.bytes.length - encoded.length);
|
||||||
|
slicedCopy.length = encoded.length;
|
||||||
|
System.arraycopy(encoded.bytes, encoded.offset, slicedCopy.bytes, slicedCopy.offset, encoded.length);
|
||||||
|
assertArrayEquals(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length),
|
||||||
|
Arrays.copyOfRange(slicedCopy.bytes, slicedCopy.offset, slicedCopy.offset + slicedCopy.length));
|
||||||
|
encoded = slicedCopy;
|
||||||
|
}
|
||||||
|
return Uid.decodeId(encoded.bytes, encoded.offset, encoded.length);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue