Adding tests and resolving LANG-710, reported by Benjamin Valentin. Note that this changed such that the code will now escape an unfinished entity (i.e. &#030). This matches browser behaviour.

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@1142389 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Henri Yandell 2011-07-03 07:55:33 +00:00
parent 23a71e792b
commit 8914d7f617
2 changed files with 33 additions and 7 deletions

View File

@ -20,9 +20,11 @@
import java.io.Writer;
/**
* Translate XML numeric entities of the form &#[xX]?\d+; to
* Translate XML numeric entities of the form &#[xX]?\d+;? to
* the specific codepoint.
*
* Note that the semi-colon is optional.
*
* @since 3.0
* @version $Id$
*/
@ -33,7 +35,9 @@ public class NumericEntityUnescaper extends CharSequenceTranslator {
*/
@Override
public int translate(CharSequence input, int index, Writer out) throws IOException {
if(input.charAt(index) == '&' && index < (input.length() - 1) && input.charAt(index + 1) == '#') {
int seqEnd = input.length();
// Uses -2 to ensure there is something after the &#
if(input.charAt(index) == '&' && index < seqEnd - 2 && input.charAt(index + 1) == '#') {
int start = index + 2;
boolean isHex = false;
@ -41,10 +45,19 @@ public int translate(CharSequence input, int index, Writer out) throws IOExcepti
if(firstChar == 'x' || firstChar == 'X') {
start++;
isHex = true;
// Check there's more than just an x after the &#
if(start == seqEnd) {
return 0;
}
}
int end = start;
while(input.charAt(end) != ';') {
// Note that this supports character codes without a ; on the end
while(end < seqEnd && ( (input.charAt(end) >= '0' && input.charAt(end) <= '9') ||
(input.charAt(end) >= 'a' && input.charAt(end) <= 'f') ||
(input.charAt(end) >= 'A' && input.charAt(end) <= 'F') ) )
{
end++;
}
@ -56,6 +69,7 @@ public int translate(CharSequence input, int index, Writer out) throws IOExcepti
entityValue = Integer.parseInt(input.subSequence(start, end).toString(), 10);
}
} catch(NumberFormatException nfe) {
System.err.println("FAIL: " + input.subSequence(start, end) + "[" + start +"]["+ end +"]");
return 0;
}
@ -66,7 +80,10 @@ public int translate(CharSequence input, int index, Writer out) throws IOExcepti
} else {
out.write(entityValue);
}
return 2 + (end - start) + (isHex ? 1 : 0) + 1;
boolean semiNext = (end != seqEnd) && (input.charAt(end) == ';');
return 2 + (end - start) + (isHex ? 1 : 0) + (semiNext ? 1 : 0);
}
return 0;
}

View File

@ -36,11 +36,20 @@ public void testSupplementaryUnescaping() {
public void testOutOfBounds() {
NumericEntityUnescaper neu = new NumericEntityUnescaper();
String input = "Test &";
String expected = input;
assertEquals("Failed to ignore when last character is &", "Test &", neu.translate("Test &"));
assertEquals("Failed to ignore when last character is &", "Test &#", neu.translate("Test &#"));
assertEquals("Failed to ignore when last character is &", "Test &#x", neu.translate("Test &#x"));
assertEquals("Failed to ignore when last character is &", "Test &#X", neu.translate("Test &#X"));
}
public void testUnfinishedEntity() {
NumericEntityUnescaper neu = new NumericEntityUnescaper();
String input = "Test &#x30 not test";
String expected = "Test \u0030 not test";
String result = neu.translate(input);
assertEquals("Failed to ignore when last character is &", expected, result);
assertEquals("Failed to support unfinished entities (i.e. missing semi-colon", expected, result);
}
}