From 7da57151ed31bab515a312e81249743a69ada90c Mon Sep 17 00:00:00 2001 From: Joakim Erdfelt Date: Wed, 28 Aug 2019 12:08:18 -0500 Subject: [PATCH 1/3] Issue #4033 - lenient percent decode in URIUtil + Allows for preserving decoded Strings like "X%YZ" Signed-off-by: Joakim Erdfelt --- .../org/eclipse/jetty/util/StringUtil.java | 15 +++ .../java/org/eclipse/jetty/util/URIUtil.java | 107 ++++++++++++++++-- .../org/eclipse/jetty/util/URIUtilTest.java | 31 ++++- 3 files changed, 138 insertions(+), 15 deletions(-) diff --git a/jetty-util/src/main/java/org/eclipse/jetty/util/StringUtil.java b/jetty-util/src/main/java/org/eclipse/jetty/util/StringUtil.java index 2c466a34b55..fbc96e38c1d 100644 --- a/jetty-util/src/main/java/org/eclipse/jetty/util/StringUtil.java +++ b/jetty-util/src/main/java/org/eclipse/jetty/util/StringUtil.java @@ -644,6 +644,21 @@ public class StringUtil return __UTF8.equalsIgnoreCase(charset) || __UTF8.equalsIgnoreCase(normalizeCharset(charset)); } + public static boolean isHex(String str, int offset, int length) + { + for (int i = offset; i < (offset + length); i++) + { + char c = str.charAt(i); + if (!(((c >= 'a') && (c <= 'f')) || + ((c >= 'A') && (c <= 'F')) || + ((c >= '0') && (c <= '9')))) + { + return false; + } + } + return true; + } + public static String printable(String name) { if (name == null) diff --git a/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java b/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java index b86a1b7958a..9e9b2c536dd 100644 --- a/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java +++ b/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java @@ -470,24 +470,72 @@ public class URIUtil builder = new Utf8StringBuilder(path.length()); builder.append(path, offset, i - offset); } - if ((i + 2) < end) + + // lenient percent decoding + if (i >= end) { - char u = path.charAt(i + 1); - if (u == 'u') + // [LENIENT] a percent sign at end of string. + builder.append('%'); + i = end; + } + else if (end > (i + 1)) + { + char type = path.charAt(i + 1); + if (type == 'u') { - // TODO this is wrong. This is a codepoint not a char - builder.append((char)(0xffff & TypeUtil.parseInt(path, i + 2, 4, 16))); - i += 5; + // We have a possible (deprecated) microsoft unicode code point "%u####" + // - not recommended to use as it's limited to 2 bytes. + if ((i + 5) >= end) + { + // [LENIENT] we have a partial "%u####" at the end of a string. + builder.append(path, i, (end - i)); + i = end; + } + else + { + // this seems wrong, as we are casting to a char, but that's the known + // limitation of this deprecated encoding (only 2 bytes allowed) + if (StringUtil.isHex(path, i + 2, 4)) + { + builder.append((char)(0xffff & TypeUtil.parseInt(path, i + 2, 4, 16))); + i += 5; + } + else + { + // [LENIENT] copy the "%u" as-is. + builder.append(path, i, 2); + i += 1; + } + } + } + else if (end > (i + 2)) + { + // we have a possible "%##" encoding + if (StringUtil.isHex(path, i + 1, 2)) + { + char c1 = path.charAt(i + 1); + char c2 = path.charAt(i + 2); + builder.append((byte)(0xff & (TypeUtil.convertHexDigit(c1) * 16 + TypeUtil.convertHexDigit(c2)))); + i += 2; + } + else + { + builder.append(path, i, 3); + i += 2; + } } else { - builder.append((byte)(0xff & (TypeUtil.convertHexDigit(u) * 16 + TypeUtil.convertHexDigit(path.charAt(i + 2))))); - i += 2; + // [LENIENT] incomplete "%##" sequence at end of string + builder.append(path, i, (end - i)); + i = end; } } else { - throw new IllegalArgumentException("Bad URI % encoding"); + // [LENIENT] the "%" at the end of the string + builder.append(path, i, (end - i)); + i = end; } break; @@ -1156,12 +1204,32 @@ public class URIUtil int oa = uriA.charAt(a++); int ca = oa; if (ca == '%') - ca = TypeUtil.convertHexDigit(uriA.charAt(a++)) * 16 + TypeUtil.convertHexDigit(uriA.charAt(a++)); + { + ca = lenientPercentDecode(uriA, a); + if (ca == (-1)) + { + ca = '%'; + } + else + { + a += 2; + } + } int ob = uriB.charAt(b++); int cb = ob; if (cb == '%') - cb = TypeUtil.convertHexDigit(uriB.charAt(b++)) * 16 + TypeUtil.convertHexDigit(uriB.charAt(b++)); + { + cb = lenientPercentDecode(uriB, b); + if (cb == (-1)) + { + cb = '%'; + } + else + { + b += 2; + } + } if (ca == '/' && oa != ob) return false; @@ -1172,6 +1240,23 @@ public class URIUtil return a == lenA && b == lenB; } + private static int lenientPercentDecode(String str, int offset) + { + if (offset >= str.length()) + return -1; + + char a1 = str.charAt(offset); + char a2 = str.charAt(offset + 1); + try + { + return TypeUtil.convertHexDigit(a1) * 16 + TypeUtil.convertHexDigit(a2); + } + catch (NumberFormatException e) + { + return -1; + } + } + public static boolean equalsIgnoreEncodings(URI uriA, URI uriB) { if (uriA.equals(uriB)) diff --git a/jetty-util/src/test/java/org/eclipse/jetty/util/URIUtilTest.java b/jetty-util/src/test/java/org/eclipse/jetty/util/URIUtilTest.java index 0ea76fcef5f..7e7a1aa628a 100644 --- a/jetty-util/src/test/java/org/eclipse/jetty/util/URIUtilTest.java +++ b/jetty-util/src/test/java/org/eclipse/jetty/util/URIUtilTest.java @@ -90,9 +90,22 @@ public class URIUtilTest // Test for null character (real world ugly test case) byte[] oddBytes = {'/', 0x00, '/'}; String odd = new String(oddBytes, StandardCharsets.ISO_8859_1); - assertEquals(odd, URIUtil.decodePath("/%00/")); arguments.add(Arguments.of("/%00/", odd)); + // Deprecated Microsoft Percent-U encoding + arguments.add(Arguments.of("abc%u3040", "abc\u3040")); + + // Lenient decode + arguments.add(Arguments.of("abc%xyz", "abc%xyz")); // not a "%##" + arguments.add(Arguments.of("abc%", "abc%")); // percent at end of string + arguments.add(Arguments.of("abc%A", "abc%A")); // incomplete "%##" at end of string + arguments.add(Arguments.of("abc%uvwxyz", "abc%uvwxyz")); // not a valid "%u####" + arguments.add(Arguments.of("abc%uEFGHIJ", "abc%uEFGHIJ")); // not a valid "%u####" + arguments.add(Arguments.of("abc%uABC", "abc%uABC")); // incomplete "%u####" + arguments.add(Arguments.of("abc%uAB", "abc%uAB")); // incomplete "%u####" + arguments.add(Arguments.of("abc%uA", "abc%uA")); // incomplete "%u####" + arguments.add(Arguments.of("abc%u", "abc%u")); // incomplete "%u####" + return arguments.stream(); } @@ -344,7 +357,11 @@ public class URIUtilTest Arguments.of("/b rry%27s", "/b%20rry%27s"), Arguments.of("/foo%2fbar", "/foo%2fbar"), - Arguments.of("/foo%2fbar", "/foo%2Fbar") + Arguments.of("/foo%2fbar", "/foo%2Fbar"), + + // encoded vs not-encode ("%" symbol is encoded as "%25") + Arguments.of("/abc%25xyz", "/abc%xyz"), + Arguments.of("/zzz%25", "/zzz%") ); } @@ -358,11 +375,17 @@ public class URIUtilTest public static Stream equalsIgnoreEncodingStringFalseSource() { return Stream.of( + // case difference Arguments.of("ABC", "abc"), + // Encoding difference ("'" is "%27") Arguments.of("/barry's", "/barry%26s"), - + // Never match on "%2f" differences Arguments.of("/foo/bar", "/foo%2fbar"), - Arguments.of("/foo2fbar", "/foo/bar") + // not actually encoded + Arguments.of("/foo2fbar", "/foo/bar"), + // encoded vs not-encode ("%" symbol is encoded as "%25") + Arguments.of("/yyy%25zzz", "/aaa%xxx"), + Arguments.of("/zzz%25", "/aaa%") ); } From f47115c5856921728954cf5c24de106753ee0a0c Mon Sep 17 00:00:00 2001 From: Joakim Erdfelt Date: Wed, 28 Aug 2019 12:20:52 -0500 Subject: [PATCH 2/3] Issue #4033 - More tests for Lenient URIUtil behavior Signed-off-by: Joakim Erdfelt --- .../main/java/org/eclipse/jetty/util/StringUtil.java | 5 +++++ .../main/java/org/eclipse/jetty/util/URIUtil.java | 12 ++++-------- .../java/org/eclipse/jetty/util/URIUtilTest.java | 2 ++ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/jetty-util/src/main/java/org/eclipse/jetty/util/StringUtil.java b/jetty-util/src/main/java/org/eclipse/jetty/util/StringUtil.java index fbc96e38c1d..2b01724eb82 100644 --- a/jetty-util/src/main/java/org/eclipse/jetty/util/StringUtil.java +++ b/jetty-util/src/main/java/org/eclipse/jetty/util/StringUtil.java @@ -646,6 +646,11 @@ public class StringUtil public static boolean isHex(String str, int offset, int length) { + if (offset + length > str.length()) + { + return false; + } + for (int i = offset; i < (offset + length); i++) { char c = str.charAt(i); diff --git a/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java b/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java index 9e9b2c536dd..63c4b081162 100644 --- a/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java +++ b/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java @@ -513,9 +513,7 @@ public class URIUtil // we have a possible "%##" encoding if (StringUtil.isHex(path, i + 1, 2)) { - char c1 = path.charAt(i + 1); - char c2 = path.charAt(i + 2); - builder.append((byte)(0xff & (TypeUtil.convertHexDigit(c1) * 16 + TypeUtil.convertHexDigit(c2)))); + builder.append((byte)TypeUtil.parseInt(path, i + 1, 2, 16)); i += 2; } else @@ -1245,13 +1243,11 @@ public class URIUtil if (offset >= str.length()) return -1; - char a1 = str.charAt(offset); - char a2 = str.charAt(offset + 1); - try + if (StringUtil.isHex(str, offset, 2)) { - return TypeUtil.convertHexDigit(a1) * 16 + TypeUtil.convertHexDigit(a2); + return TypeUtil.parseInt(str, offset, 2, 16); } - catch (NumberFormatException e) + else { return -1; } diff --git a/jetty-util/src/test/java/org/eclipse/jetty/util/URIUtilTest.java b/jetty-util/src/test/java/org/eclipse/jetty/util/URIUtilTest.java index 7e7a1aa628a..ce02b8019bb 100644 --- a/jetty-util/src/test/java/org/eclipse/jetty/util/URIUtilTest.java +++ b/jetty-util/src/test/java/org/eclipse/jetty/util/URIUtilTest.java @@ -361,6 +361,8 @@ public class URIUtilTest // encoded vs not-encode ("%" symbol is encoded as "%25") Arguments.of("/abc%25xyz", "/abc%xyz"), + Arguments.of("/abc%25xy", "/abc%xy"), + Arguments.of("/abc%25x", "/abc%x"), Arguments.of("/zzz%25", "/zzz%") ); } From 2fcb311c568e4f288d2cfdbd191027e7e37b98b2 Mon Sep 17 00:00:00 2001 From: Joakim Erdfelt Date: Wed, 28 Aug 2019 16:32:19 -0500 Subject: [PATCH 3/3] Issue #4033 - Addressing Lenient URIUtil decode behavior change in test Signed-off-by: Joakim Erdfelt --- .../jetty/server/HttpConnectionTest.java | 17 ++++++----------- .../java/org/eclipse/jetty/util/URIUtil.java | 3 ++- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/jetty-server/src/test/java/org/eclipse/jetty/server/HttpConnectionTest.java b/jetty-server/src/test/java/org/eclipse/jetty/server/HttpConnectionTest.java index f2be892780c..408ebb1535a 100644 --- a/jetty-server/src/test/java/org/eclipse/jetty/server/HttpConnectionTest.java +++ b/jetty-server/src/test/java/org/eclipse/jetty/server/HttpConnectionTest.java @@ -690,17 +690,12 @@ public class HttpConnectionTest @Test public void testBadURIencoding() throws Exception { - Log.getLogger(HttpParser.class).info("badMessage: bad encoding expected ..."); - String response; - - try (StacklessLogging stackless = new StacklessLogging(HttpParser.class)) - { - response = connector.getResponse("GET /bad/encoding%1 HTTP/1.1\r\n" + - "Host: localhost\r\n" + - "Connection: close\r\n" + - "\r\n"); - checkContains(response, 0, "HTTP/1.1 400"); - } + // The URI is being leniently decoded, leaving the "%x" alone + String response = connector.getResponse("GET /bad/encoding%x HTTP/1.1\r\n" + + "Host: localhost\r\n" + + "Connection: close\r\n" + + "\r\n"); + checkContains(response, 0, "HTTP/1.1 200"); } @Test diff --git a/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java b/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java index 63c4b081162..73a3e77e705 100644 --- a/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java +++ b/jetty-util/src/main/java/org/eclipse/jetty/util/URIUtil.java @@ -1229,11 +1229,12 @@ public class URIUtil } } + // Don't match on encoded slash if (ca == '/' && oa != ob) return false; if (ca != cb) - return URIUtil.decodePath(uriA).equals(URIUtil.decodePath(uriB)); + return false; } return a == lenA && b == lenB; }