diff --git a/jetty-server/src/test/java/org/eclipse/jetty/server/HttpConnectionTest.java b/jetty-server/src/test/java/org/eclipse/jetty/server/HttpConnectionTest.java index f2be892780c..408ebb1535a 100644 --- a/jetty-server/src/test/java/org/eclipse/jetty/server/HttpConnectionTest.java +++ b/jetty-server/src/test/java/org/eclipse/jetty/server/HttpConnectionTest.java @@ -690,17 +690,12 @@ public class HttpConnectionTest @Test public void testBadURIencoding() throws Exception { - Log.getLogger(HttpParser.class).info("badMessage: bad encoding expected ..."); - String response; - - try (StacklessLogging stackless = new StacklessLogging(HttpParser.class)) - { - response = connector.getResponse("GET /bad/encoding%1 HTTP/1.1\r\n" + - "Host: localhost\r\n" + - "Connection: close\r\n" + - "\r\n"); - checkContains(response, 0, "HTTP/1.1 400"); - } + // The URI is being leniently decoded, leaving the "%x" alone + String response = connector.getResponse("GET /bad/encoding%x HTTP/1.1\r\n" + + "Host: localhost\r\n" + + "Connection: close\r\n" + + "\r\n"); + checkContains(response, 0, "HTTP/1.1 200"); } @Test diff --git a/jetty-util/src/main/java/org/eclipse/jetty/util/StringUtil.java b/jetty-util/src/main/java/org/eclipse/jetty/util/StringUtil.java index 2c466a34b55..2b01724eb82 100644 --- a/jetty-util/src/main/java/org/eclipse/jetty/util/StringUtil.java +++ b/jetty-util/src/main/java/org/eclipse/jetty/util/StringUtil.java @@ -644,6 +644,26 @@ public class StringUtil return __UTF8.equalsIgnoreCase(charset) || __UTF8.equalsIgnoreCase(normalizeCharset(charset)); } + public static boolean isHex(String str, int offset, int length) + { + if (offset + length > str.length()) + { + return false; + } + + for (int i = offset; i < (offset + length); i++) + { + char c = str.charAt(i); + if (!(((c >= 'a') && (c <= 'f')) || + ((c >= 'A') && (c <= 'F')) || + ((c >= '0') && (c <= '9')))) + { + return false; + } + } + return true; + } + public static String printable(String name) { if (name == null) diff --git a/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java b/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java index b86a1b7958a..73a3e77e705 100644 --- a/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java +++ b/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java @@ -470,24 +470,70 @@ public class URIUtil builder = new Utf8StringBuilder(path.length()); builder.append(path, offset, i - offset); } - if ((i + 2) < end) + + // lenient percent decoding + if (i >= end) { - char u = path.charAt(i + 1); - if (u == 'u') + // [LENIENT] a percent sign at end of string. + builder.append('%'); + i = end; + } + else if (end > (i + 1)) + { + char type = path.charAt(i + 1); + if (type == 'u') { - // TODO this is wrong. This is a codepoint not a char - builder.append((char)(0xffff & TypeUtil.parseInt(path, i + 2, 4, 16))); - i += 5; + // We have a possible (deprecated) microsoft unicode code point "%u####" + // - not recommended to use as it's limited to 2 bytes. + if ((i + 5) >= end) + { + // [LENIENT] we have a partial "%u####" at the end of a string. + builder.append(path, i, (end - i)); + i = end; + } + else + { + // this seems wrong, as we are casting to a char, but that's the known + // limitation of this deprecated encoding (only 2 bytes allowed) + if (StringUtil.isHex(path, i + 2, 4)) + { + builder.append((char)(0xffff & TypeUtil.parseInt(path, i + 2, 4, 16))); + i += 5; + } + else + { + // [LENIENT] copy the "%u" as-is. + builder.append(path, i, 2); + i += 1; + } + } + } + else if (end > (i + 2)) + { + // we have a possible "%##" encoding + if (StringUtil.isHex(path, i + 1, 2)) + { + builder.append((byte)TypeUtil.parseInt(path, i + 1, 2, 16)); + i += 2; + } + else + { + builder.append(path, i, 3); + i += 2; + } } else { - builder.append((byte)(0xff & (TypeUtil.convertHexDigit(u) * 16 + TypeUtil.convertHexDigit(path.charAt(i + 2))))); - i += 2; + // [LENIENT] incomplete "%##" sequence at end of string + builder.append(path, i, (end - i)); + i = end; } } else { - throw new IllegalArgumentException("Bad URI % encoding"); + // [LENIENT] the "%" at the end of the string + builder.append(path, i, (end - i)); + i = end; } break; @@ -1156,22 +1202,58 @@ public class URIUtil int oa = uriA.charAt(a++); int ca = oa; if (ca == '%') - ca = TypeUtil.convertHexDigit(uriA.charAt(a++)) * 16 + TypeUtil.convertHexDigit(uriA.charAt(a++)); + { + ca = lenientPercentDecode(uriA, a); + if (ca == (-1)) + { + ca = '%'; + } + else + { + a += 2; + } + } int ob = uriB.charAt(b++); int cb = ob; if (cb == '%') - cb = TypeUtil.convertHexDigit(uriB.charAt(b++)) * 16 + TypeUtil.convertHexDigit(uriB.charAt(b++)); + { + cb = lenientPercentDecode(uriB, b); + if (cb == (-1)) + { + cb = '%'; + } + else + { + b += 2; + } + } + // Don't match on encoded slash if (ca == '/' && oa != ob) return false; if (ca != cb) - return URIUtil.decodePath(uriA).equals(URIUtil.decodePath(uriB)); + return false; } return a == lenA && b == lenB; } + private static int lenientPercentDecode(String str, int offset) + { + if (offset >= str.length()) + return -1; + + if (StringUtil.isHex(str, offset, 2)) + { + return TypeUtil.parseInt(str, offset, 2, 16); + } + else + { + return -1; + } + } + public static boolean equalsIgnoreEncodings(URI uriA, URI uriB) { if (uriA.equals(uriB)) diff --git a/jetty-util/src/test/java/org/eclipse/jetty/util/URIUtilTest.java b/jetty-util/src/test/java/org/eclipse/jetty/util/URIUtilTest.java index 0ea76fcef5f..ce02b8019bb 100644 --- a/jetty-util/src/test/java/org/eclipse/jetty/util/URIUtilTest.java +++ b/jetty-util/src/test/java/org/eclipse/jetty/util/URIUtilTest.java @@ -90,9 +90,22 @@ public class URIUtilTest // Test for null character (real world ugly test case) byte[] oddBytes = {'/', 0x00, '/'}; String odd = new String(oddBytes, StandardCharsets.ISO_8859_1); - assertEquals(odd, URIUtil.decodePath("/%00/")); arguments.add(Arguments.of("/%00/", odd)); + // Deprecated Microsoft Percent-U encoding + arguments.add(Arguments.of("abc%u3040", "abc\u3040")); + + // Lenient decode + arguments.add(Arguments.of("abc%xyz", "abc%xyz")); // not a "%##" + arguments.add(Arguments.of("abc%", "abc%")); // percent at end of string + arguments.add(Arguments.of("abc%A", "abc%A")); // incomplete "%##" at end of string + arguments.add(Arguments.of("abc%uvwxyz", "abc%uvwxyz")); // not a valid "%u####" + arguments.add(Arguments.of("abc%uEFGHIJ", "abc%uEFGHIJ")); // not a valid "%u####" + arguments.add(Arguments.of("abc%uABC", "abc%uABC")); // incomplete "%u####" + arguments.add(Arguments.of("abc%uAB", "abc%uAB")); // incomplete "%u####" + arguments.add(Arguments.of("abc%uA", "abc%uA")); // incomplete "%u####" + arguments.add(Arguments.of("abc%u", "abc%u")); // incomplete "%u####" + return arguments.stream(); } @@ -344,7 +357,13 @@ public class URIUtilTest Arguments.of("/b rry%27s", "/b%20rry%27s"), Arguments.of("/foo%2fbar", "/foo%2fbar"), - Arguments.of("/foo%2fbar", "/foo%2Fbar") + Arguments.of("/foo%2fbar", "/foo%2Fbar"), + + // encoded vs not-encode ("%" symbol is encoded as "%25") + Arguments.of("/abc%25xyz", "/abc%xyz"), + Arguments.of("/abc%25xy", "/abc%xy"), + Arguments.of("/abc%25x", "/abc%x"), + Arguments.of("/zzz%25", "/zzz%") ); } @@ -358,11 +377,17 @@ public class URIUtilTest public static Stream equalsIgnoreEncodingStringFalseSource() { return Stream.of( + // case difference Arguments.of("ABC", "abc"), + // Encoding difference ("'" is "%27") Arguments.of("/barry's", "/barry%26s"), - + // Never match on "%2f" differences Arguments.of("/foo/bar", "/foo%2fbar"), - Arguments.of("/foo2fbar", "/foo/bar") + // not actually encoded + Arguments.of("/foo2fbar", "/foo/bar"), + // encoded vs not-encode ("%" symbol is encoded as "%25") + Arguments.of("/yyy%25zzz", "/aaa%xxx"), + Arguments.of("/zzz%25", "/aaa%") ); }