tighten the isUrlEncoded check

ideally we shouldn't need this function and instead never double
encode strings, but auditing for that is beyond what I have time
for. currently, putBlob(" ") and putBlob("%20") behave the same
way which is arguably incorrect
This commit is contained in:
Ka-Hing Cheung 2015-05-27 16:12:11 -07:00 committed by Andrew Gaul
parent 1103a778d2
commit 82ab88d589
2 changed files with 29 additions and 3 deletions

View File

@ -91,10 +91,32 @@ public class Strings2 {
return CIDR_PATTERN.matcher(in).matches();
}
private static final Pattern URL_ENCODED_PATTERN = Pattern.compile(".*%[a-fA-F0-9][a-fA-F0-9].*");
// taken from https://docs.oracle.com/javase/7/docs/api/java/net/URI.html#legal-chars
private static final Pattern URL_VALID_PATTERN = Pattern.compile("[a-zA-Z0-9_\\-!.~'()*,;:$&+=?/\\[\\]@%]+");
private static boolean isHexadecimal(char ch) {
return (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f') || (ch >= '0' && ch <= '9');
}
public static boolean isUrlEncoded(String in) {
return URL_ENCODED_PATTERN.matcher(in).matches();
if (!URL_VALID_PATTERN.matcher(in).matches()) {
return false;
}
// ensure that all % are followed by 2 hexadecimal characters
int percentIdx = 0;
while ((percentIdx = in.indexOf('%', percentIdx)) != -1) {
if (percentIdx + 2 >= in.length()) {
return false;
}
if (!isHexadecimal(in.charAt(percentIdx + 1)) ||
!isHexadecimal(in.charAt(percentIdx + 2))) {
return false;
}
percentIdx += 2;
}
return true;
}
/**

View File

@ -30,6 +30,10 @@ public class Strings2Test {
public void testIsEncoded() {
assert Strings2.isUrlEncoded("/read-tests/%73%6f%6d%65%20%66%69%6c%65");
assert !Strings2.isUrlEncoded("/read-tests/ tep");
assert !Strings2.isUrlEncoded("/read-tests/dealde%2Fl04 011e%204c8df");
assert !Strings2.isUrlEncoded("/read-tests/%/");
assert !Strings2.isUrlEncoded("/read-tests/%ZZ");
assert Strings2.isUrlEncoded("/read-tests/%20");
}
public void testNoDoubleEncode() {