SEC-1377: Extended HTML escaping functionality to take account of control characters, whitespace and to handle Unicode supplementary characters (surrogate pairs).

This commit is contained in:
Luke Taylor 2010-01-22 01:55:13 +00:00
parent dbf673ec37
commit 04447bdbf0
2 changed files with 68 additions and 10 deletions

View File

@ -1,9 +1,11 @@
package org.springframework.security.web.util; package org.springframework.security.web.util;
/** /**
* Utility for escaping characters in HTML strings. * Internal utility for escaping characters in HTML strings.
* *
* @author Luke Taylor * @author Luke Taylor
*
* @see http://www.owasp.org/index.php/How_to_perform_HTML_entity_encoding_in_Java
*/ */
public abstract class TextEscapeUtils { public abstract class TextEscapeUtils {
@ -17,22 +19,42 @@ public abstract class TextEscapeUtils {
for (int i=0; i < s.length(); i++) { for (int i=0; i < s.length(); i++) {
char c = s.charAt(i); char c = s.charAt(i);
if(c == '<') { if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9') {
sb.append(c);
} else if(c == '<') {
sb.append("&lt;"); sb.append("&lt;");
} else if (c == '>') { } else if (c == '>') {
sb.append("&gt;"); sb.append("&gt;");
} else if (c == '"') {
sb.append("&#034;");
} else if (c == '\'') {
sb.append("&#039;");
} else if (c == '&') { } else if (c == '&') {
sb.append("&amp;"); sb.append("&amp;");
} else { } else if (Character.isWhitespace(c)) {
sb.append(c); sb.append("&#").append((int)c).append(";");
} else if (Character.isISOControl(c)) {
// ignore control chars
} else if (Character.isHighSurrogate(c)) {
if (i + 1 >= s.length()) {
// Unexpected end
throw new IllegalArgumentException("Missing low surrogate character at end of string");
}
char low = s.charAt(i + 1);
if (!Character.isLowSurrogate(low)) {
throw new IllegalArgumentException("Expected low surrogate character but found value = " + (int)low);
}
int codePoint = Character.toCodePoint(c, low);
if (Character.isDefined(codePoint)) {
sb.append("&#").append(codePoint).append(";");
}
i++; // skip the next character as we have already dealt with it
} else if (Character.isLowSurrogate(c)) {
throw new IllegalArgumentException("Unexpected low surrogate character, value = " + (int)c);
} else if (Character.isDefined(c)) {
sb.append("&#").append((int) c).append(";");
} }
// Ignore anything else
} }
return sb.toString(); return sb.toString();
} }
} }

View File

@ -7,9 +7,45 @@ import org.springframework.security.web.util.TextEscapeUtils;
public class TextEscapeUtilsTests { public class TextEscapeUtilsTests {
/**
* &amp;, &lt;, &gt;, &#34;, &#39 and&#32;(space) escaping
*/
@Test @Test
public void charactersAreEscapedCorrectly() { public void charactersAreEscapedCorrectly() {
assertEquals("a&lt;script&gt;&#034;&#039;", TextEscapeUtils.escapeEntities("a<script>\"'")); assertEquals("&amp;&#32;a&lt;script&gt;&#34;&#39;", TextEscapeUtils.escapeEntities("& a<script>\"'"));
} }
@Test
public void nullOrEmptyStringIsHandled() throws Exception {
assertEquals("", TextEscapeUtils.escapeEntities(""));
assertNull(TextEscapeUtils.escapeEntities(null));
}
@Test(expected=IllegalArgumentException.class)
public void invalidLowSurrogateIsDetected() throws Exception {
TextEscapeUtils.escapeEntities("abc\uDCCCdef");
}
@Test(expected=IllegalArgumentException.class)
public void missingLowSurrogateIsDetected() throws Exception {
TextEscapeUtils.escapeEntities("abc\uD888a");
}
@Test(expected=IllegalArgumentException.class)
public void highSurrogateAtEndOfStringIsRejected() throws Exception {
TextEscapeUtils.escapeEntities("abc\uD888");
}
/**
* Delta char: &#66560;
*/
@Test
public void validSurrogatePairIsAccepted() throws Exception {
assertEquals("abc&#66560;a", TextEscapeUtils.escapeEntities("abc\uD801\uDC00a"));
}
@Test
public void undefinedSurrogatePairIsIgnored() throws Exception {
assertEquals("abca", TextEscapeUtils.escapeEntities("abc\uD888\uDC00a"));
}
} }