SEC-1377: Extended HTML escaping functionality to take account of control characters, whitespace and to handle Unicode supplementary characters (surrogate pairs).

This commit is contained in:
Luke Taylor 2010-01-22 01:55:13 +00:00
parent dbf673ec37
commit 04447bdbf0
2 changed files with 68 additions and 10 deletions

View File

@ -1,9 +1,11 @@
package org.springframework.security.web.util;
/**
* Utility for escaping characters in HTML strings.
* Internal utility for escaping characters in HTML strings.
*
* @author Luke Taylor
*
* @see http://www.owasp.org/index.php/How_to_perform_HTML_entity_encoding_in_Java
*/
public abstract class TextEscapeUtils {
@ -17,22 +19,42 @@ public abstract class TextEscapeUtils {
for (int i=0; i < s.length(); i++) {
char c = s.charAt(i);
if(c == '<') {
if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9') {
sb.append(c);
} else if(c == '<') {
sb.append("&lt;");
} else if (c == '>') {
sb.append("&gt;");
} else if (c == '"') {
sb.append("&#034;");
} else if (c == '\'') {
sb.append("&#039;");
} else if (c == '&') {
sb.append("&amp;");
} else {
sb.append(c);
} else if (Character.isWhitespace(c)) {
sb.append("&#").append((int)c).append(";");
} else if (Character.isISOControl(c)) {
// ignore control chars
} else if (Character.isHighSurrogate(c)) {
if (i + 1 >= s.length()) {
// Unexpected end
throw new IllegalArgumentException("Missing low surrogate character at end of string");
}
char low = s.charAt(i + 1);
if (!Character.isLowSurrogate(low)) {
throw new IllegalArgumentException("Expected low surrogate character but found value = " + (int)low);
}
int codePoint = Character.toCodePoint(c, low);
if (Character.isDefined(codePoint)) {
sb.append("&#").append(codePoint).append(";");
}
i++; // skip the next character as we have already dealt with it
} else if (Character.isLowSurrogate(c)) {
throw new IllegalArgumentException("Unexpected low surrogate character, value = " + (int)c);
} else if (Character.isDefined(c)) {
sb.append("&#").append((int) c).append(";");
}
// Ignore anything else
}
return sb.toString();
}
}

View File

@ -7,9 +7,45 @@ import org.springframework.security.web.util.TextEscapeUtils;
public class TextEscapeUtilsTests {
/**
* &amp;, &lt;, &gt;, &#34;, &#39 and&#32;(space) escaping
*/
@Test
public void charactersAreEscapedCorrectly() {
assertEquals("a&lt;script&gt;&#034;&#039;", TextEscapeUtils.escapeEntities("a<script>\"'"));
assertEquals("&amp;&#32;a&lt;script&gt;&#34;&#39;", TextEscapeUtils.escapeEntities("& a<script>\"'"));
}
@Test
public void nullOrEmptyStringIsHandled() throws Exception {
assertEquals("", TextEscapeUtils.escapeEntities(""));
assertNull(TextEscapeUtils.escapeEntities(null));
}
@Test(expected=IllegalArgumentException.class)
public void invalidLowSurrogateIsDetected() throws Exception {
TextEscapeUtils.escapeEntities("abc\uDCCCdef");
}
@Test(expected=IllegalArgumentException.class)
public void missingLowSurrogateIsDetected() throws Exception {
TextEscapeUtils.escapeEntities("abc\uD888a");
}
@Test(expected=IllegalArgumentException.class)
public void highSurrogateAtEndOfStringIsRejected() throws Exception {
TextEscapeUtils.escapeEntities("abc\uD888");
}
/**
* Delta char: &#66560;
*/
@Test
public void validSurrogatePairIsAccepted() throws Exception {
assertEquals("abc&#66560;a", TextEscapeUtils.escapeEntities("abc\uD801\uDC00a"));
}
@Test
public void undefinedSurrogatePairIsIgnored() throws Exception {
assertEquals("abca", TextEscapeUtils.escapeEntities("abc\uD888\uDC00a"));
}
}