Merge pull request #4034 from eclipse/jetty-9.4.x-4033-uriutil-equalsignoreencodings

Issue #4033 - Add Lenient percent decode in URIUtil
2019-08-29 07:21:34 -05:00 · 2019-08-29 07:21:34 -05:00 · 25071c3e99
parent 2b72f08f1b 2fcb311c56
commit 25071c3e99
4 changed files with 149 additions and 27 deletions
--- a/jetty-server/src/test/java/org/eclipse/jetty/server/HttpConnectionTest.java
+++ b/jetty-server/src/test/java/org/eclipse/jetty/server/HttpConnectionTest.java
@ -690,17 +690,12 @@ public class HttpConnectionTest
    @Test
    public void testBadURIencoding() throws Exception
    {
-        Log.getLogger(HttpParser.class).info("badMessage: bad encoding expected ...");
+        // The URI is being leniently decoded, leaving the "%x" alone
-        String response;
+        String response = connector.getResponse("GET /bad/encoding%x HTTP/1.1\r\n" +
-
+            "Host: localhost\r\n" +
-        try (StacklessLogging stackless = new StacklessLogging(HttpParser.class))
+            "Connection: close\r\n" +
-        {
+            "\r\n");
-            response = connector.getResponse("GET /bad/encoding%1 HTTP/1.1\r\n" +
+        checkContains(response, 0, "HTTP/1.1 200");
                "Host: localhost\r\n" +
                "Connection: close\r\n" +
                "\r\n");
            checkContains(response, 0, "HTTP/1.1 400");
        }
    }
    @Test
--- a/jetty-util/src/main/java/org/eclipse/jetty/util/StringUtil.java
+++ b/jetty-util/src/main/java/org/eclipse/jetty/util/StringUtil.java
@ -644,6 +644,26 @@ public class StringUtil
        return __UTF8.equalsIgnoreCase(charset) || __UTF8.equalsIgnoreCase(normalizeCharset(charset));
    }
    public static boolean isHex(String str, int offset, int length)
    {
        if (offset + length > str.length())
        {
            return false;
        }
        for (int i = offset; i < (offset + length); i++)
        {
            char c = str.charAt(i);
            if (!(((c >= 'a') && (c <= 'f')) ||
                ((c >= 'A') && (c <= 'F')) ||
                ((c >= '0') && (c <= '9'))))
            {
                return false;
            }
        }
        return true;
    }
    public static String printable(String name)
    {
        if (name == null)
--- a/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java
+++ b/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java
@ -470,24 +470,70 @@ public class URIUtil
                            builder = new Utf8StringBuilder(path.length());
                            builder.append(path, offset, i - offset);
                        }
-                        if ((i + 2) < end)
+
                        // lenient percent decoding
                        if (i >= end)
                        {
-                            char u = path.charAt(i + 1);
+                            // [LENIENT] a percent sign at end of string.
-                            if (u == 'u')
+                            builder.append('%');
                            i = end;
                        }
                        else if (end > (i + 1))
                        {
                            char type = path.charAt(i + 1);
                            if (type == 'u')
                            {
-                                // TODO this is wrong. This is a codepoint not a char
+                                // We have a possible (deprecated) microsoft unicode code point "%u####"
-                                builder.append((char)(0xffff & TypeUtil.parseInt(path, i + 2, 4, 16)));
+                                // - not recommended to use as it's limited to 2 bytes.
-                                i += 5;
+                                if ((i + 5) >= end)
                                {
                                    // [LENIENT] we have a partial "%u####" at the end of a string.
                                    builder.append(path, i, (end - i));
                                    i = end;
                                }
                                else
                                {
                                    // this seems wrong, as we are casting to a char, but that's the known
                                    // limitation of this deprecated encoding (only 2 bytes allowed)
                                    if (StringUtil.isHex(path, i + 2, 4))
                                    {
                                        builder.append((char)(0xffff & TypeUtil.parseInt(path, i + 2, 4, 16)));
                                        i += 5;
                                    }
                                    else
                                    {
                                        // [LENIENT] copy the "%u" as-is.
                                        builder.append(path, i, 2);
                                        i += 1;
                                    }
                                }
                            }
                            else if (end > (i + 2))
                            {
                                // we have a possible "%##" encoding
                                if (StringUtil.isHex(path, i + 1, 2))
                                {
                                    builder.append((byte)TypeUtil.parseInt(path, i + 1, 2, 16));
                                    i += 2;
                                }
                                else
                                {
                                    builder.append(path, i, 3);
                                    i += 2;
                                }
                            }
                            else
                            {
-                                builder.append((byte)(0xff & (TypeUtil.convertHexDigit(u) * 16 + TypeUtil.convertHexDigit(path.charAt(i + 2)))));
+                                // [LENIENT] incomplete "%##" sequence at end of string
-                                i += 2;
+                                builder.append(path, i, (end - i));
                                i = end;
                            }
                        }
                        else
                        {
-                            throw new IllegalArgumentException("Bad URI % encoding");
+                            // [LENIENT] the "%" at the end of the string
                            builder.append(path, i, (end - i));
                            i = end;
                        }
                        break;
@ -1156,22 +1202,58 @@ public class URIUtil
            int oa = uriA.charAt(a++);
            int ca = oa;
            if (ca == '%')
-                ca = TypeUtil.convertHexDigit(uriA.charAt(a++)) * 16 + TypeUtil.convertHexDigit(uriA.charAt(a++));
+            {
                ca = lenientPercentDecode(uriA, a);
                if (ca == (-1))
                {
                    ca = '%';
                }
                else
                {
                    a += 2;
                }
            }
            int ob = uriB.charAt(b++);
            int cb = ob;
            if (cb == '%')
-                cb = TypeUtil.convertHexDigit(uriB.charAt(b++)) * 16 + TypeUtil.convertHexDigit(uriB.charAt(b++));
+            {
                cb = lenientPercentDecode(uriB, b);
                if (cb == (-1))
                {
                    cb = '%';
                }
                else
                {
                    b += 2;
                }
            }
            // Don't match on encoded slash
            if (ca == '/' && oa != ob)
                return false;
            if (ca != cb)
-                return URIUtil.decodePath(uriA).equals(URIUtil.decodePath(uriB));
+                return false;
        }
        return a == lenA && b == lenB;
    }
    private static int lenientPercentDecode(String str, int offset)
    {
        if (offset >= str.length())
            return -1;
        if (StringUtil.isHex(str, offset, 2))
        {
            return TypeUtil.parseInt(str, offset, 2, 16);
        }
        else
        {
            return -1;
        }
    }
    public static boolean equalsIgnoreEncodings(URI uriA, URI uriB)
    {
        if (uriA.equals(uriB))
--- a/jetty-util/src/test/java/org/eclipse/jetty/util/URIUtilTest.java
+++ b/jetty-util/src/test/java/org/eclipse/jetty/util/URIUtilTest.java
@ -90,9 +90,22 @@ public class URIUtilTest
        // Test for null character (real world ugly test case)
        byte[] oddBytes = {'/', 0x00, '/'};
        String odd = new String(oddBytes, StandardCharsets.ISO_8859_1);
        assertEquals(odd, URIUtil.decodePath("/%00/"));
        arguments.add(Arguments.of("/%00/", odd));
        // Deprecated Microsoft Percent-U encoding
        arguments.add(Arguments.of("abc%u3040", "abc\u3040"));
        // Lenient decode
        arguments.add(Arguments.of("abc%xyz", "abc%xyz")); // not a "%##"
        arguments.add(Arguments.of("abc%", "abc%")); // percent at end of string
        arguments.add(Arguments.of("abc%A", "abc%A")); // incomplete "%##" at end of string
        arguments.add(Arguments.of("abc%uvwxyz", "abc%uvwxyz")); // not a valid "%u####"
        arguments.add(Arguments.of("abc%uEFGHIJ", "abc%uEFGHIJ")); // not a valid "%u####"
        arguments.add(Arguments.of("abc%uABC", "abc%uABC")); // incomplete "%u####"
        arguments.add(Arguments.of("abc%uAB", "abc%uAB")); // incomplete "%u####"
        arguments.add(Arguments.of("abc%uA", "abc%uA")); // incomplete "%u####"
        arguments.add(Arguments.of("abc%u", "abc%u")); // incomplete "%u####"
        return arguments.stream();
    }
@ -344,7 +357,13 @@ public class URIUtilTest
            Arguments.of("/b rry%27s", "/b%20rry%27s"),
            Arguments.of("/foo%2fbar", "/foo%2fbar"),
-            Arguments.of("/foo%2fbar", "/foo%2Fbar")
+            Arguments.of("/foo%2fbar", "/foo%2Fbar"),
            // encoded vs not-encode ("%" symbol is encoded as "%25")
            Arguments.of("/abc%25xyz", "/abc%xyz"),
            Arguments.of("/abc%25xy", "/abc%xy"),
            Arguments.of("/abc%25x", "/abc%x"),
            Arguments.of("/zzz%25", "/zzz%")
        );
    }
@ -358,11 +377,17 @@ public class URIUtilTest
    public static Stream<Arguments> equalsIgnoreEncodingStringFalseSource()
    {
        return Stream.of(
            // case difference
            Arguments.of("ABC", "abc"),
            // Encoding difference ("'" is "%27")
            Arguments.of("/barry's", "/barry%26s"),
-
+            // Never match on "%2f" differences
            Arguments.of("/foo/bar", "/foo%2fbar"),
-            Arguments.of("/foo2fbar", "/foo/bar")
+            // not actually encoded
            Arguments.of("/foo2fbar", "/foo/bar"),
            // encoded vs not-encode ("%" symbol is encoded as "%25")
            Arguments.of("/yyy%25zzz", "/aaa%xxx"),
            Arguments.of("/zzz%25", "/aaa%")
        );
    }