mirror of
https://github.com/apache/commons-lang.git
synced 2025-02-09 11:34:55 +00:00
Adding tests and resolving LANG-710, reported by Benjamin Valentin. Note that this changed such that the code will now escape an unfinished entity (i.e. ). This matches browser behaviour.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@1142389 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
23a71e792b
commit
8914d7f617
@ -20,9 +20,11 @@
|
|||||||
import java.io.Writer;
|
import java.io.Writer;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Translate XML numeric entities of the form &#[xX]?\d+; to
|
* Translate XML numeric entities of the form &#[xX]?\d+;? to
|
||||||
* the specific codepoint.
|
* the specific codepoint.
|
||||||
*
|
*
|
||||||
|
* Note that the semi-colon is optional.
|
||||||
|
*
|
||||||
* @since 3.0
|
* @since 3.0
|
||||||
* @version $Id$
|
* @version $Id$
|
||||||
*/
|
*/
|
||||||
@ -33,7 +35,9 @@ public class NumericEntityUnescaper extends CharSequenceTranslator {
|
|||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public int translate(CharSequence input, int index, Writer out) throws IOException {
|
public int translate(CharSequence input, int index, Writer out) throws IOException {
|
||||||
if(input.charAt(index) == '&' && index < (input.length() - 1) && input.charAt(index + 1) == '#') {
|
int seqEnd = input.length();
|
||||||
|
// Uses -2 to ensure there is something after the &#
|
||||||
|
if(input.charAt(index) == '&' && index < seqEnd - 2 && input.charAt(index + 1) == '#') {
|
||||||
int start = index + 2;
|
int start = index + 2;
|
||||||
boolean isHex = false;
|
boolean isHex = false;
|
||||||
|
|
||||||
@ -41,10 +45,19 @@ public int translate(CharSequence input, int index, Writer out) throws IOExcepti
|
|||||||
if(firstChar == 'x' || firstChar == 'X') {
|
if(firstChar == 'x' || firstChar == 'X') {
|
||||||
start++;
|
start++;
|
||||||
isHex = true;
|
isHex = true;
|
||||||
|
|
||||||
|
// Check there's more than just an x after the &#
|
||||||
|
if(start == seqEnd) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int end = start;
|
int end = start;
|
||||||
while(input.charAt(end) != ';') {
|
// Note that this supports character codes without a ; on the end
|
||||||
|
while(end < seqEnd && ( (input.charAt(end) >= '0' && input.charAt(end) <= '9') ||
|
||||||
|
(input.charAt(end) >= 'a' && input.charAt(end) <= 'f') ||
|
||||||
|
(input.charAt(end) >= 'A' && input.charAt(end) <= 'F') ) )
|
||||||
|
{
|
||||||
end++;
|
end++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -56,6 +69,7 @@ public int translate(CharSequence input, int index, Writer out) throws IOExcepti
|
|||||||
entityValue = Integer.parseInt(input.subSequence(start, end).toString(), 10);
|
entityValue = Integer.parseInt(input.subSequence(start, end).toString(), 10);
|
||||||
}
|
}
|
||||||
} catch(NumberFormatException nfe) {
|
} catch(NumberFormatException nfe) {
|
||||||
|
System.err.println("FAIL: " + input.subSequence(start, end) + "[" + start +"]["+ end +"]");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -66,7 +80,10 @@ public int translate(CharSequence input, int index, Writer out) throws IOExcepti
|
|||||||
} else {
|
} else {
|
||||||
out.write(entityValue);
|
out.write(entityValue);
|
||||||
}
|
}
|
||||||
return 2 + (end - start) + (isHex ? 1 : 0) + 1;
|
|
||||||
|
boolean semiNext = (end != seqEnd) && (input.charAt(end) == ';');
|
||||||
|
|
||||||
|
return 2 + (end - start) + (isHex ? 1 : 0) + (semiNext ? 1 : 0);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -36,11 +36,20 @@ public void testSupplementaryUnescaping() {
|
|||||||
|
|
||||||
public void testOutOfBounds() {
|
public void testOutOfBounds() {
|
||||||
NumericEntityUnescaper neu = new NumericEntityUnescaper();
|
NumericEntityUnescaper neu = new NumericEntityUnescaper();
|
||||||
String input = "Test &";
|
|
||||||
String expected = input;
|
assertEquals("Failed to ignore when last character is &", "Test &", neu.translate("Test &"));
|
||||||
|
assertEquals("Failed to ignore when last character is &", "Test &#", neu.translate("Test &#"));
|
||||||
|
assertEquals("Failed to ignore when last character is &", "Test &#x", neu.translate("Test &#x"));
|
||||||
|
assertEquals("Failed to ignore when last character is &", "Test &#X", neu.translate("Test &#X"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testUnfinishedEntity() {
|
||||||
|
NumericEntityUnescaper neu = new NumericEntityUnescaper();
|
||||||
|
String input = "Test 0 not test";
|
||||||
|
String expected = "Test \u0030 not test";
|
||||||
|
|
||||||
String result = neu.translate(input);
|
String result = neu.translate(input);
|
||||||
assertEquals("Failed to ignore when last character is &", expected, result);
|
assertEquals("Failed to support unfinished entities (i.e. missing semi-colon", expected, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user