Allow Uid#decodeId to decode from a byte array slice (#26987)

Today we only allow to decode byte arrays where the data has a 0 offset and the same length as the array. Allowing to decode stuff from a slice will make decoding IDs cheaper if the the ID is for instance coming from a term dictionary or BytesRef. Relates to #26931
2017-10-12 20:19:14 +02:00 · 2017-10-12 20:19:14 +02:00 · 047a916169
parent 93a47cf860
commit 047a916169
2 changed files with 72 additions and 47 deletions
--- a/core/src/main/java/org/elasticsearch/index/mapper/Uid.java
+++ b/core/src/main/java/org/elasticsearch/index/mapper/Uid.java
@ -135,36 +135,36 @@ public final class Uid {
        // 'xxx=' and 'xxx' could be considered the same id
        final int length = id.length();
        switch (length & 0x03) {
-        case 0:
-            break;
-        case 1:
-            return false;
-        case 2:
-            // the last 2 symbols (12 bits) are encoding 1 byte (8 bits)
-            // so the last symbol only actually uses 8-6=2 bits and can only take 4 values
-            char last = id.charAt(length - 1);
-            if (last != 'A' && last != 'Q' && last != 'g' && last != 'w') {
+            case 0:
+                break;
+            case 1:
                return false;
-            }
-            break;
-        case 3:
-            // The last 3 symbols (18 bits) are encoding 2 bytes (16 bits)
-            // so the last symbol only actually uses 16-12=4 bits and can only take 16 values
-            last = id.charAt(length - 1);
-            if (last != 'A' && last != 'E' && last != 'I' && last != 'M' && last != 'Q'&& last != 'U'&& last != 'Y'
+            case 2:
+                // the last 2 symbols (12 bits) are encoding 1 byte (8 bits)
+                // so the last symbol only actually uses 8-6=2 bits and can only take 4 values
+                char last = id.charAt(length - 1);
+                if (last != 'A' && last != 'Q' && last != 'g' && last != 'w') {
+                    return false;
+                }
+                break;
+            case 3:
+                // The last 3 symbols (18 bits) are encoding 2 bytes (16 bits)
+                // so the last symbol only actually uses 16-12=4 bits and can only take 16 values
+                last = id.charAt(length - 1);
+                if (last != 'A' && last != 'E' && last != 'I' && last != 'M' && last != 'Q'&& last != 'U'&& last != 'Y'
                    && last != 'c'&& last != 'g'&& last != 'k' && last != 'o' && last != 's' && last != 'w'
                    && last != '0' && last != '4' && last != '8') {
-                return false;
-            }
-            break;
-        default:
-            // number & 0x03 is always in [0,3]
-            throw new AssertionError("Impossible case");
+                    return false;
+                }
+                break;
+            default:
+                // number & 0x03 is always in [0,3]
+                throw new AssertionError("Impossible case");
        }
        for (int i = 0; i < length; ++i) {
            final char c = id.charAt(i);
            final boolean allowed =
-                    (c >= '0' && c <= '9') ||
+                (c >= '0' && c <= '9') ||
                    (c >= 'A' && c <= 'Z') ||
                    (c >= 'a' && c <= 'z') ||
                    c == '-' || c == '_';
@ -244,16 +244,16 @@ public final class Uid {
        }
    }

-    private static String decodeNumericId(byte[] idBytes) {
-        assert Byte.toUnsignedInt(idBytes[0]) == NUMERIC;
-        int length = (idBytes.length - 1) * 2;
+    private static String decodeNumericId(byte[] idBytes, int offset, int len) {
+        assert Byte.toUnsignedInt(idBytes[offset]) == NUMERIC;
+        int length = (len - 1) * 2;
        char[] chars = new char[length];
-        for (int i = 1; i < idBytes.length; ++i) {
-            final int b = Byte.toUnsignedInt(idBytes[i]);
+        for (int i = 1; i < len; ++i) {
+            final int b = Byte.toUnsignedInt(idBytes[offset + i]);
            final int b1 = (b >>> 4);
            final int b2 = b & 0x0f;
            chars[(i - 1) * 2] = (char) (b1 + '0');
-            if (i == idBytes.length - 1 && b2 == 0x0f) {
+            if (i == len - 1 && b2 == 0x0f) {
                length--;
                break;
            }
@ -262,15 +262,17 @@ public final class Uid {
        return new String(chars, 0, length);
    }

-    private static String decodeUtf8Id(byte[] idBytes) {
-        assert Byte.toUnsignedInt(idBytes[0]) == UTF8;
-        return new BytesRef(idBytes, 1, idBytes.length - 1).utf8ToString();
+    private static String decodeUtf8Id(byte[] idBytes, int offset, int length) {
+        assert Byte.toUnsignedInt(idBytes[offset]) == UTF8;
+        return new BytesRef(idBytes, offset + 1, length - 1).utf8ToString();
    }

-    private static String decodeBase64Id(byte[] idBytes) {
-        assert Byte.toUnsignedInt(idBytes[0]) <= BASE64_ESCAPE;
-        if (Byte.toUnsignedInt(idBytes[0]) == BASE64_ESCAPE) {
-            idBytes = Arrays.copyOfRange(idBytes, 1, idBytes.length);
+    private static String decodeBase64Id(byte[] idBytes, int offset, int length) {
+        assert Byte.toUnsignedInt(idBytes[offset]) <= BASE64_ESCAPE;
+        if (Byte.toUnsignedInt(idBytes[offset]) == BASE64_ESCAPE) {
+            idBytes = Arrays.copyOfRange(idBytes, offset + 1, offset + length);
+        } else if ((idBytes.length == length && offset == 0) == false) { // no need to copy if it's not a slice
+            idBytes = Arrays.copyOfRange(idBytes, offset, offset + length);
        }
        return Base64.getUrlEncoder().withoutPadding().encodeToString(idBytes);
    }
@ -278,17 +280,23 @@ public final class Uid {
    /** Decode an indexed id back to its original form.
     *  @see #encodeId */
    public static String decodeId(byte[] idBytes) {
-        if (idBytes.length == 0) {
+        return decodeId(idBytes, 0, idBytes.length);
+    }
+
+    /** Decode an indexed id back to its original form.
+     *  @see #encodeId */
+    public static String decodeId(byte[] idBytes, int offset, int length) {
+        if (length == 0) {
            throw new IllegalArgumentException("Ids can't be empty");
        }
-        final int magicChar = Byte.toUnsignedInt(idBytes[0]);
+        final int magicChar = Byte.toUnsignedInt(idBytes[offset]);
        switch (magicChar) {
-        case NUMERIC:
-            return decodeNumericId(idBytes);
-        case UTF8:
-            return decodeUtf8Id(idBytes);
-        default:
-            return decodeBase64Id(idBytes);
+            case NUMERIC:
+                return decodeNumericId(idBytes, offset, length);
+            case UTF8:
+                return decodeUtf8Id(idBytes, offset, length);
+            default:
+                return decodeBase64Id(idBytes, offset, length);
        }
    }
 }
--- a/core/src/test/java/org/elasticsearch/index/mapper/UidTests.java
+++ b/core/src/test/java/org/elasticsearch/index/mapper/UidTests.java
@ -79,7 +79,7 @@ public class UidTests extends ESTestCase {
        for (int iter = 0; iter < iters; ++iter) {
            final String id = TestUtil.randomRealisticUnicodeString(random(), 1, 10);
            BytesRef encoded = Uid.encodeId(id);
-            assertEquals(id, Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length)));
+            assertEquals(id, doDecodeId(encoded));
            assertTrue(encoded.length <= 1 + new BytesRef(id).length);
        }
    }
@ -93,7 +93,7 @@ public class UidTests extends ESTestCase {
                id = "0" + id;
            }
            BytesRef encoded = Uid.encodeId(id);
-            assertEquals(id, Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length)));
+            assertEquals(id, doDecodeId(encoded));
            assertEquals(1 + (id.length() + 1) / 2, encoded.length);
        }
    }
@ -105,9 +105,26 @@ public class UidTests extends ESTestCase {
            random().nextBytes(binaryId);
            final String id = Base64.getUrlEncoder().withoutPadding().encodeToString(binaryId);
            BytesRef encoded = Uid.encodeId(id);
-            assertEquals(id, Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length)));
+            assertEquals(id, doDecodeId(encoded));
            assertTrue(encoded.length <= 1 + binaryId.length);
        }
    }

+    private static String doDecodeId(BytesRef encoded) {
+
+        if (randomBoolean()) {
+            return Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length));
+        } else {
+            if (randomBoolean()) {
+                BytesRef slicedCopy = new BytesRef(randomIntBetween(encoded.length + 1, encoded.length + 100));
+                slicedCopy.offset = randomIntBetween(1, slicedCopy.bytes.length - encoded.length);
+                slicedCopy.length = encoded.length;
+                System.arraycopy(encoded.bytes, encoded.offset, slicedCopy.bytes, slicedCopy.offset, encoded.length);
+                assertArrayEquals(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length),
+                    Arrays.copyOfRange(slicedCopy.bytes, slicedCopy.offset, slicedCopy.offset + slicedCopy.length));
+                encoded = slicedCopy;
+            }
+            return Uid.decodeId(encoded.bytes, encoded.offset, encoded.length);
+        }
+    }
 }