Allow Uid#decodeId to decode from a byte array slice (#26987)

Today we only allow to decode byte arrays where the data has a 0 offset and the same length as the array. Allowing to decode stuff from a slice will make decoding IDs cheaper if the the ID is for instance coming from a term dictionary or BytesRef. Relates to #26931
2017-10-12 20:19:14 +02:00 · 2017-10-12 20:19:14 +02:00 · 047a916169
parent 93a47cf860
commit 047a916169
2 changed files with 72 additions and 47 deletions
--- a/core/src/main/java/org/elasticsearch/index/mapper/Uid.java
+++ b/core/src/main/java/org/elasticsearch/index/mapper/Uid.java
@ -244,16 +244,16 @@ public final class Uid {
        }
    }

-    private static String decodeNumericId(byte[] idBytes) {
-        assert Byte.toUnsignedInt(idBytes[0]) == NUMERIC;
-        int length = (idBytes.length - 1) * 2;
+    private static String decodeNumericId(byte[] idBytes, int offset, int len) {
+        assert Byte.toUnsignedInt(idBytes[offset]) == NUMERIC;
+        int length = (len - 1) * 2;
        char[] chars = new char[length];
-        for (int i = 1; i < idBytes.length; ++i) {
-            final int b = Byte.toUnsignedInt(idBytes[i]);
+        for (int i = 1; i < len; ++i) {
+            final int b = Byte.toUnsignedInt(idBytes[offset + i]);
            final int b1 = (b >>> 4);
            final int b2 = b & 0x0f;
            chars[(i - 1) * 2] = (char) (b1 + '0');
-            if (i == idBytes.length - 1 && b2 == 0x0f) {
+            if (i == len - 1 && b2 == 0x0f) {
                length--;
                break;
            }
@ -262,15 +262,17 @@ public final class Uid {
        return new String(chars, 0, length);
    }

-    private static String decodeUtf8Id(byte[] idBytes) {
-        assert Byte.toUnsignedInt(idBytes[0]) == UTF8;
-        return new BytesRef(idBytes, 1, idBytes.length - 1).utf8ToString();
+    private static String decodeUtf8Id(byte[] idBytes, int offset, int length) {
+        assert Byte.toUnsignedInt(idBytes[offset]) == UTF8;
+        return new BytesRef(idBytes, offset + 1, length - 1).utf8ToString();
    }

-    private static String decodeBase64Id(byte[] idBytes) {
-        assert Byte.toUnsignedInt(idBytes[0]) <= BASE64_ESCAPE;
-        if (Byte.toUnsignedInt(idBytes[0]) == BASE64_ESCAPE) {
-            idBytes = Arrays.copyOfRange(idBytes, 1, idBytes.length);
+    private static String decodeBase64Id(byte[] idBytes, int offset, int length) {
+        assert Byte.toUnsignedInt(idBytes[offset]) <= BASE64_ESCAPE;
+        if (Byte.toUnsignedInt(idBytes[offset]) == BASE64_ESCAPE) {
+            idBytes = Arrays.copyOfRange(idBytes, offset + 1, offset + length);
+        } else if ((idBytes.length == length && offset == 0) == false) { // no need to copy if it's not a slice
+            idBytes = Arrays.copyOfRange(idBytes, offset, offset + length);
        }
        return Base64.getUrlEncoder().withoutPadding().encodeToString(idBytes);
    }
@ -278,17 +280,23 @@ public final class Uid {
    /** Decode an indexed id back to its original form.
     *  @see #encodeId */
    public static String decodeId(byte[] idBytes) {
-        if (idBytes.length == 0) {
+        return decodeId(idBytes, 0, idBytes.length);
+    }
+
+    /** Decode an indexed id back to its original form.
+     *  @see #encodeId */
+    public static String decodeId(byte[] idBytes, int offset, int length) {
+        if (length == 0) {
            throw new IllegalArgumentException("Ids can't be empty");
        }
-        final int magicChar = Byte.toUnsignedInt(idBytes[0]);
+        final int magicChar = Byte.toUnsignedInt(idBytes[offset]);
        switch (magicChar) {
            case NUMERIC:
-            return decodeNumericId(idBytes);
+                return decodeNumericId(idBytes, offset, length);
            case UTF8:
-            return decodeUtf8Id(idBytes);
+                return decodeUtf8Id(idBytes, offset, length);
            default:
-            return decodeBase64Id(idBytes);
+                return decodeBase64Id(idBytes, offset, length);
        }
    }
 }
--- a/core/src/test/java/org/elasticsearch/index/mapper/UidTests.java
+++ b/core/src/test/java/org/elasticsearch/index/mapper/UidTests.java
@ -79,7 +79,7 @@ public class UidTests extends ESTestCase {
        for (int iter = 0; iter < iters; ++iter) {
            final String id = TestUtil.randomRealisticUnicodeString(random(), 1, 10);
            BytesRef encoded = Uid.encodeId(id);
-            assertEquals(id, Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length)));
+            assertEquals(id, doDecodeId(encoded));
            assertTrue(encoded.length <= 1 + new BytesRef(id).length);
        }
    }
@ -93,7 +93,7 @@ public class UidTests extends ESTestCase {
                id = "0" + id;
            }
            BytesRef encoded = Uid.encodeId(id);
-            assertEquals(id, Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length)));
+            assertEquals(id, doDecodeId(encoded));
            assertEquals(1 + (id.length() + 1) / 2, encoded.length);
        }
    }
@ -105,9 +105,26 @@ public class UidTests extends ESTestCase {
            random().nextBytes(binaryId);
            final String id = Base64.getUrlEncoder().withoutPadding().encodeToString(binaryId);
            BytesRef encoded = Uid.encodeId(id);
-            assertEquals(id, Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length)));
+            assertEquals(id, doDecodeId(encoded));
            assertTrue(encoded.length <= 1 + binaryId.length);
        }
    }

+    private static String doDecodeId(BytesRef encoded) {
+
+        if (randomBoolean()) {
+            return Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length));
+        } else {
+            if (randomBoolean()) {
+                BytesRef slicedCopy = new BytesRef(randomIntBetween(encoded.length + 1, encoded.length + 100));
+                slicedCopy.offset = randomIntBetween(1, slicedCopy.bytes.length - encoded.length);
+                slicedCopy.length = encoded.length;
+                System.arraycopy(encoded.bytes, encoded.offset, slicedCopy.bytes, slicedCopy.offset, encoded.length);
+                assertArrayEquals(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length),
+                    Arrays.copyOfRange(slicedCopy.bytes, slicedCopy.offset, slicedCopy.offset + slicedCopy.length));
+                encoded = slicedCopy;
+            }
+            return Uid.decodeId(encoded.bytes, encoded.offset, encoded.length);
+        }
+    }
 }