Make URIUtil.equalsIgnoreEncodings(String,String) work with bi-directional comparison

This commit is contained in:
Joakim Erdfelt 2022-08-16 10:08:09 -05:00
parent ded11be439
commit 8147ece3ec
No known key found for this signature in database
GPG Key ID: 2D0E1FB8FE4B68B4
6 changed files with 266 additions and 193 deletions

View File

@ -498,6 +498,26 @@ public class TypeUtil
return d;
}
public static boolean isHex(String str, int offset, int len)
{
if (str == null)
return false;
if (offset + len > str.length())
return false;
for (int i = offset; i < offset + len; i++)
{
char c = str.charAt(i);
if (!(c >= '0' && c <= '9') &&
!(c >= 'a' && c <= 'f') &&
!(c >= 'A' && c <= 'F'))
return false;
}
return true;
}
public static void toHex(byte b, Appendable buf)
{
try

View File

@ -630,8 +630,11 @@ public final class URIUtil
if (u == 'u')
{
// UTF16 encoding is only supported with UriCompliance.Violation.UTF16_ENCODINGS.
// This is wrong. This is a codepoint not a char
builder.append((char)(0xffff & TypeUtil.parseInt(path, i + 2, 4, 16)));
int[] codePoints = {(0xffff & TypeUtil.parseInt(path, i + 2, 4, 16))};
String str = new String(codePoints, 0, 1);
byte[] bytes = str.getBytes(StandardCharsets.UTF_8);
for (byte b: bytes)
builder.append(b);
i += 5;
}
else
@ -719,6 +722,12 @@ public final class URIUtil
if (isSafe(code))
return true;
encodeCodepoint(code, builder);
return false;
}
private static void encodeCodepoint(int code, Utf8StringBuilder builder)
{
// Code point is 7-bit, simple encode
if (code <= 0x7F)
{
@ -737,7 +746,6 @@ public final class URIUtil
appendHexValue(builder, b);
}
}
return false;
}
private static void appendHexValue(Utf8StringBuilder builder, byte value)
@ -1579,66 +1587,151 @@ public final class URIUtil
// Only URIUtil is using this method
static boolean equalsIgnoreEncodings(String uriA, String uriB)
{
int lenA = uriA.length();
int lenB = uriB.length();
int a = 0;
int b = 0;
while (a < lenA && b < lenB)
try
{
int oa = uriA.charAt(a++);
int ca = oa;
if (ca == '%')
{
ca = lenientPercentDecode(uriA, a);
if (ca == (-1))
{
ca = '%';
}
else
{
a += 2;
}
}
int ob = uriB.charAt(b++);
int cb = ob;
if (cb == '%')
{
cb = lenientPercentDecode(uriB, b);
if (cb == (-1))
{
cb = '%';
}
else
{
b += 2;
}
}
// Don't match on encoded slash
if (ca == '/' && oa != ob)
return false;
if (ca != cb)
return false;
String safeDecodedUriA = ensureSafeEncoding(uriA);
String safeDecodedUriB = ensureSafeEncoding(uriB);
return safeDecodedUriA.equals(safeDecodedUriB);
}
catch (IllegalArgumentException e)
{
return false;
}
return a == lenA && b == lenB;
}
private static int lenientPercentDecode(String str, int offset)
static String ensureSafeEncoding(String path)
{
if (offset >= str.length())
return -1;
if (path == null)
return null;
if ("".equals(path) || "/".equals(path))
return path;
if (StringUtil.isHex(str, offset, 2))
int offset = 0;
int length = path.length();
try
{
return TypeUtil.parseInt(str, offset, 2, 16);
Utf8StringBuilder builder = null;
int end = offset + length;
for (int i = offset; i < end; i++)
{
char c = path.charAt(i);
if (c == '%')
{
if (builder == null)
{
builder = new Utf8StringBuilder(path.length());
builder.append(path, offset, i - offset);
}
if ((i + 2) < end)
{
char u = path.charAt(i + 1);
if (u == 'u')
{
if (TypeUtil.isHex(path, i + 2, 4))
{
// Always decode percent-u encoding to UTF-8
int codepoint = (0xffff & TypeUtil.parseInt(path, i + 2, 4, 16));
encodeCodepoint(codepoint, builder);
i += 5;
}
else
{
// not valid percent-u, encode the percent symbol
builder.append("%25");
}
}
else
{
if (TypeUtil.isHex(path, i + 1, 2))
{
// valid Hex, attempt to decode it
byte b = (byte)(0xff & (TypeUtil.convertHexDigit(u) * 16 + TypeUtil.convertHexDigit(path.charAt(i + 2))));
if (mustBeEncoded(b) || b == 0x2F)
{
// unsafe, keep encoding
encodeCodepoint(b, builder);
}
else
{
// safe to decode
builder.append(b);
}
i += 2;
}
else
{
// not valid percent encoding, encode the percent symbol
builder.append("%25");
}
}
}
else
{
// incomplete percent encoding, encode the percent symbol
builder.append("%25");
}
}
else
{
if (mustBeEncoded(c))
{
if (builder == null)
{
builder = new Utf8StringBuilder(path.length());
builder.append(path, offset, i - offset);
}
encodeCodepoint(c, builder);
}
else
{
if (builder != null)
builder.append(c);
}
}
}
if (builder != null)
return builder.toString();
return path;
}
else
catch (IllegalArgumentException e)
{
return -1;
throw e;
}
catch (Exception e)
{
throw new IllegalArgumentException("cannot decode URI", e);
}
}
/**
* Check codepoint for rules on URI encoding.
*
* @param codepoint the codepoint to check
* @return true if the codepoint must be encoded, false otherwise
*/
private static boolean mustBeEncoded(int codepoint)
{
// 8-bit
if (codepoint > 0x7F)
return true;
// control characters
if ((codepoint <= 0x1F) || (codepoint == 0x7F)) // control characters
return true;
// unsafe characters
if (codepoint == '"' || codepoint == '<' || codepoint == '>' || codepoint == '%' ||
codepoint == '{' || codepoint == '}' || codepoint == '|' || codepoint == '\\' ||
codepoint == '^' || codepoint == '`')
return true;
// characters rejected by java.net.URI
if (codepoint == ' ')
return true;
return false;
}
public static boolean equalsIgnoreEncodings(URI uriA, URI uriB)
@ -1680,16 +1773,15 @@ public final class URIUtil
* Add a sub path to an existing URI.
*
* @param uri A URI to add the path to
* @param path A decoded path element
* @param encodePath true to encode provided path, false to leave it alone in resulting URI
* @param path A safe path element
* @return URI with path added.
* @see #addPaths(String, String)
*/
public static URI addPath(URI uri, String path, boolean encodePath)
public static URI addPath(URI uri, String path)
{
Objects.requireNonNull(uri, "URI");
if (path == null)
if (path == null || "".equals(path))
return uri;
// collapse any "//" paths in the path portion
@ -1712,10 +1804,7 @@ public final class URIUtil
// collapse any "//" paths in the path portion
int offset = path.charAt(0) == '/' ? 1 : 0;
if (encodePath)
encodePath(buf, path, offset);
else
buf.append(path, offset, pathLen);
buf.append(path, offset, pathLen);
return URI.create(buf.toString());
}

View File

@ -349,11 +349,14 @@ public class PathResource extends Resource
private static URI normalize(Path path)
{
String raw = URIUtil.correctFileURI(path.toUri()).toASCIIString();
URI fixedUri = URIUtil.correctFileURI(path.toUri());
String raw = fixedUri.toASCIIString();
if (Files.isDirectory(path) && !raw.endsWith("/"))
raw += '/';
return URI.create(raw);
{
return URI.create(raw + '/');
}
return fixedUri;
}
@Override

View File

@ -332,13 +332,13 @@ public abstract class Resource
// that like an absolute path.
while (subUriPath.startsWith(URIUtil.SLASH))
{
// TODO XXX this appears entirely unneccessary and inefficient. We already have utilities
// TODO XXX this appears entirely unnecessary and inefficient. We already have utilities
// to handle appending path strings with/without slashes.
subUriPath = subUriPath.substring(1);
}
URI uri = getURI();
URI resolvedUri = URIUtil.addPath(uri, subUriPath, false);
URI resolvedUri = URIUtil.addPath(uri, subUriPath);
return create(resolvedUri);
}

View File

@ -15,11 +15,15 @@ package org.eclipse.jetty.util;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.stream.Stream;
import org.eclipse.jetty.util.resource.Resource;
import org.eclipse.jetty.util.resource.ResourceFactory;
import org.hamcrest.Matchers;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.containsString;
@ -96,6 +100,47 @@ public class TypeUtilTest
assertEquals("123456789ABCDEF0", b.toString());
}
public static Stream<Arguments> isHexTrueSource()
{
return Stream.of(
Arguments.of("2A", 0, 2),
Arguments.of("2a", 0, 2),
Arguments.of("0x2F", 2, 2),
Arguments.of("0x2f", 2, 2),
Arguments.of("%25", 1, 2),
Arguments.of("%0d", 1, 2),
Arguments.of("%uC0AC", 2, 4),
Arguments.of("%uc0ac", 2, 4)
);
}
@ParameterizedTest
@MethodSource("isHexTrueSource")
public void testIsHexTrue(String input, int offset, int length)
{
assertTrue(TypeUtil.isHex(input, offset, length));
}
public static Stream<Arguments> isHexFalseSource()
{
return Stream.of(
Arguments.of("gg", 0, 2),
Arguments.of("GG", 0, 2),
Arguments.of("0xZZ", 2, 2),
Arguments.of("0xyz", 2, 2),
Arguments.of("%xy", 1, 2),
Arguments.of("%0z", 1, 2),
Arguments.of("%users", 2, 4)
);
}
@ParameterizedTest
@MethodSource("isHexFalseSource")
public void testIsHexFalse(String input, int offset, int length)
{
assertFalse(TypeUtil.isHex(input, offset, length));
}
@Test
public void testIsTrue()
{

View File

@ -404,126 +404,6 @@ public class URIUtilTest
assertEquals(expected, actual, String.format("%s+%s", path1, path2));
}
public static Stream<Arguments> uriAddPathDecodedSource()
{
List<Arguments> cases = new ArrayList<>();
URI baseUri;
baseUri = URI.create("file:///path/");
cases.add(Arguments.of(baseUri, null, "file:///path/"));
cases.add(Arguments.of(baseUri, "", "file:///path/"));
cases.add(Arguments.of(baseUri, "bbb", "file:///path/bbb"));
cases.add(Arguments.of(baseUri, "/", "file:///path/"));
cases.add(Arguments.of(baseUri, "/bbb", "file:///path/bbb"));
baseUri = URI.create("file:///tmp/aaa");
cases.add(Arguments.of(baseUri, null, "file:///tmp/aaa"));
cases.add(Arguments.of(baseUri, "", "file:///tmp/aaa"));
cases.add(Arguments.of(baseUri, "bbb", "file:///tmp/aaa/bbb"));
cases.add(Arguments.of(baseUri, "/", "file:///tmp/aaa/"));
cases.add(Arguments.of(baseUri, "/bbb", "file:///tmp/aaa/bbb"));
baseUri = URI.create("/");
cases.add(Arguments.of(baseUri, null, "/"));
cases.add(Arguments.of(baseUri, "", "/"));
cases.add(Arguments.of(baseUri, "bbb", "/bbb"));
cases.add(Arguments.of(baseUri, "/", "/"));
cases.add(Arguments.of(baseUri, "/bbb", "/bbb"));
baseUri = URI.create("");
cases.add(Arguments.of(baseUri, null, ""));
cases.add(Arguments.of(baseUri, "", ""));
cases.add(Arguments.of(baseUri, "bbb", "bbb"));
cases.add(Arguments.of(baseUri, "/", "/"));
cases.add(Arguments.of(baseUri, "/bbb", "/bbb"));
baseUri = URI.create("aaa/");
cases.add(Arguments.of(baseUri, null, "aaa/"));
cases.add(Arguments.of(baseUri, "", "aaa/"));
cases.add(Arguments.of(baseUri, "bbb", "aaa/bbb"));
cases.add(Arguments.of(baseUri, "/", "aaa/"));
cases.add(Arguments.of(baseUri, "/bbb", "aaa/bbb"));
baseUri = URI.create(";JS");
cases.add(Arguments.of(baseUri, null, ";JS"));
cases.add(Arguments.of(baseUri, "", ";JS"));
cases.add(Arguments.of(baseUri, "bbb", ";JS/bbb"));
cases.add(Arguments.of(baseUri, "/", ";JS/"));
cases.add(Arguments.of(baseUri, "/bbb", ";JS/bbb"));
baseUri = URI.create("file:///path;JS");
cases.add(Arguments.of(baseUri, null, "file:///path;JS"));
cases.add(Arguments.of(baseUri, "", "file:///path;JS"));
cases.add(Arguments.of(baseUri, "bbb", "file:///path;JS/bbb"));
cases.add(Arguments.of(baseUri, "/", "file:///path;JS/"));
cases.add(Arguments.of(baseUri, "/bbb", "file:///path;JS/bbb"));
baseUri = URI.create("?A=1");
cases.add(Arguments.of(baseUri, null, "?A=1"));
cases.add(Arguments.of(baseUri, "", "?A=1"));
cases.add(Arguments.of(baseUri, "bbb", "?A=1/bbb"));
cases.add(Arguments.of(baseUri, "/", "?A=1/"));
cases.add(Arguments.of(baseUri, "/bbb", "?A=1/bbb"));
baseUri = URI.create("aaa?A=1");
cases.add(Arguments.of(baseUri, null, "aaa?A=1"));
cases.add(Arguments.of(baseUri, "", "aaa?A=1"));
cases.add(Arguments.of(baseUri, "bbb", "aaa?A=1/bbb"));
cases.add(Arguments.of(baseUri, "/", "aaa?A=1/"));
cases.add(Arguments.of(baseUri, "/bbb", "aaa?A=1/bbb"));
baseUri = URI.create("aaa/?A=1");
cases.add(Arguments.of(baseUri, null, "aaa/?A=1"));
cases.add(Arguments.of(baseUri, "", "aaa/?A=1"));
cases.add(Arguments.of(baseUri, "bbb", "aaa/?A=1/bbb"));
cases.add(Arguments.of(baseUri, "/", "aaa/?A=1/"));
cases.add(Arguments.of(baseUri, "/bbb", "aaa/?A=1/bbb"));
baseUri = URI.create("file:///path?A=1");
cases.add(Arguments.of(baseUri, null, "file:///path?A=1"));
cases.add(Arguments.of(baseUri, "", "file:///path?A=1"));
cases.add(Arguments.of(baseUri, "bbb", "file:///path?A=1/bbb"));
cases.add(Arguments.of(baseUri, "/", "file:///path?A=1/"));
cases.add(Arguments.of(baseUri, "/bbb", "file:///path?A=1/bbb"));
// A ZipFS base URI
baseUri = URI.create("jar:file:///path/foo.jar!/");
cases.add(Arguments.of(baseUri, null, "jar:file:///path/foo.jar!/"));
cases.add(Arguments.of(baseUri, "", "jar:file:///path/foo.jar!/"));
cases.add(Arguments.of(baseUri, "bbb", "jar:file:///path/foo.jar!/bbb"));
cases.add(Arguments.of(baseUri, "bbb/", "jar:file:///path/foo.jar!/bbb/"));
cases.add(Arguments.of(baseUri, "bãm", "jar:file:///path/foo.jar!/b%C3%A3m"));
cases.add(Arguments.of(baseUri, "bãm/", "jar:file:///path/foo.jar!/b%C3%A3m/"));
cases.add(Arguments.of(baseUri, "/", "jar:file:///path/foo.jar!/"));
cases.add(Arguments.of(baseUri, "/bbb", "jar:file:///path/foo.jar!/bbb"));
cases.add(Arguments.of(baseUri, "/bbb/", "jar:file:///path/foo.jar!/bbb/"));
cases.add(Arguments.of(baseUri, "/bãm", "jar:file:///path/foo.jar!/b%C3%A3m"));
cases.add(Arguments.of(baseUri, "/bãm/", "jar:file:///path/foo.jar!/b%C3%A3m/"));
return cases.stream();
}
@ParameterizedTest(name = "[{index}] {0} + {1}")
@MethodSource("uriAddPathDecodedSource")
public void testUriAddPathDecoded(URI baseUri, String path, String expectedUri)
{
URI actual = URIUtil.addPath(baseUri, path, true);
assertThat(actual.toASCIIString(), is(expectedUri));
}
public static Stream<Arguments> uriAddPathEncodedSource()
{
List<Arguments> cases = new ArrayList<>();
@ -638,10 +518,33 @@ public class URIUtilTest
@MethodSource("uriAddPathEncodedSource")
public void testUriAddPathEncoded(URI baseUri, String path, String expectedUri)
{
URI actual = URIUtil.addPath(baseUri, path, false);
URI actual = URIUtil.addPath(baseUri, path);
assertThat(actual.toASCIIString(), is(expectedUri));
}
public static Stream<Arguments> ensureSafeEncodingSource()
{
return Stream.of(
Arguments.of("/foo", "/foo"),
Arguments.of("/barry's", "/barry's"),
// encode utf-8 unicode
Arguments.of("/bãm/", "/b%C3%A3m/"),
Arguments.of("/bä€ãm/", "/b%C3%A4%E2%82%AC%C3%A3m/"),
// encode nake % to %25
Arguments.of("/abc%x", "/abc%25x"),
// encoded characters to leave as-is
Arguments.of("/foo/%2F", "/foo/%2F"),
Arguments.of("/barry%27s", "/barry%27s")
);
}
@ParameterizedTest
@MethodSource("ensureSafeEncodingSource")
public void testEnsureSafeEncoding(String input, String expected)
{
assertThat(URIUtil.ensureSafeEncoding(input), is(expected));
}
public static Stream<Arguments> compactPathSource()
{
return Stream.of(
@ -688,12 +591,9 @@ public class URIUtilTest
{
return Stream.of(
Arguments.of("http://example.com/foo/bar", "http://example.com/foo/bar"),
Arguments.of("/barry's", "/barry%27s"),
Arguments.of("/barry%27s", "/barry's"),
Arguments.of("/barry%27s", "/barry%27s"),
Arguments.of("/b rry's", "/b%20rry%27s"),
Arguments.of("/b rry%27s", "/b%20rry's"),
Arguments.of("/b rry%27s", "/b%20rry%27s"),
Arguments.of("/re bar", "/re%20bar"),
Arguments.of("/foo%2fbar", "/foo%2fbar"),
Arguments.of("/foo%2fbar", "/foo%2Fbar"),
@ -702,7 +602,10 @@ public class URIUtilTest
Arguments.of("/abc%25xyz", "/abc%xyz"),
Arguments.of("/abc%25xy", "/abc%xy"),
Arguments.of("/abc%25x", "/abc%x"),
Arguments.of("/zzz%25", "/zzz%")
Arguments.of("/zzz%25", "/zzz%"),
// unicode encoded vs not-encoded
Arguments.of("/path/to/bä€ãm/", "/path/to/b%C3%A4%E2%82%AC%C3%A3m/")
);
}
@ -720,14 +623,27 @@ public class URIUtilTest
Arguments.of("ABC", "abc"),
// Encoding difference ("'" is "%27")
Arguments.of("/barry's", "/barry%26s"),
// %27 is a reserved character and should not be compared
Arguments.of("/barry's", "/barry%27s"),
Arguments.of("/barry%27s", "/barry's"),
Arguments.of("/b rry's", "/b%20rry%27s"),
Arguments.of("/b rry%27s", "/b%20rry's"),
// Never match on "%2f" differences - only intested in filename / directory name differences
// This could be a directory called "foo" with a file called "bar" on the left, and just a file "foo%2fbar" on the right
Arguments.of("/foo/bar", "/foo%2fbar"),
// not actually encoded
Arguments.of("/foo2fbar", "/foo/bar"),
// path params
Arguments.of("/path;a=b/to;x=y/foo/", "/path/to/foo"),
// encoded vs not-encode ("%" symbol is encoded as "%25")
Arguments.of("/yyy%25zzz", "/aaa%xxx"),
Arguments.of("/zzz%25", "/aaa%")
Arguments.of("/zzz%25", "/aaa%"),
// %2F then multi-byte unicode
Arguments.of("/path/to/bãm/", "/path%2Fto/b%C3%A3m/"),
// multi-byte unicode then %2F
Arguments.of("/path/bãm/or/bust", "/path/b%C3%A3m/or%2Fbust"),
// mix of %2F and multiple consecutive multi-byte unicode
Arguments.of("/path/to/bä€ãm/", "/path%2Fto/b%C3%A4%E2%82%AC%C3%A3m/")
);
}