Making unescapeHtml _NOT_ escape unfinished numeric entities by default (it ignores them); however adding options that will fire an exception or unescape the numeric entity. LANG-710

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@1143641 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Henri Yandell 2011-07-07 03:44:22 +00:00
parent 6be2343397
commit 9642e11aac
2 changed files with 70 additions and 4 deletions

View File

@ -18,6 +18,8 @@ package org.apache.commons.lang3.text.translate;
import java.io.IOException;
import java.io.Writer;
import java.util.Arrays;
import java.util.EnumSet;
/**
* Translate XML numeric entities of the form &#[xX]?\d+;? to
@ -30,6 +32,41 @@ import java.io.Writer;
*/
public class NumericEntityUnescaper extends CharSequenceTranslator {
public static enum OPTION { semiColonRequired, semiColonOptional, errorIfNoSemiColon }
// TODO?: Create an OptionsSet class to hide some of the conditional logic below
private final EnumSet<OPTION> options;
/**
* Create a UnicodeUnescaper.
*
* The constructor takes a list of options, only one of which is currently
* available (whether to allow the semi-colon on the end of a numeric entity to
* be optional.
*
* For example, to support numeric entities without a ';':
* new NumericEntityUnescaper(NumericEntityUnescaper.OPTION.semiColonOptional)
*
* @param options to apply to this unescaper
*/
public NumericEntityUnescaper(OPTION... options) {
if(options.length > 0) {
this.options = EnumSet.copyOf(Arrays.asList(options));
} else {
this.options = EnumSet.copyOf(Arrays.asList(new OPTION[] { OPTION.semiColonRequired }));
}
}
/**
* Whether the passed in option is currently set.
*
* @param option to check state of
* @return whether the option is set
*/
public boolean isSet(OPTION option) {
return (options == null) ? false : options.contains(option);
}
/**
* {@inheritDoc}
*/
@ -61,6 +98,17 @@ public class NumericEntityUnescaper extends CharSequenceTranslator {
end++;
}
boolean semiNext = (end != seqEnd) && (input.charAt(end) == ';');
if(!semiNext) {
if(isSet(OPTION.semiColonRequired)) {
return 0;
} else
if(isSet(OPTION.errorIfNoSemiColon)) {
throw new RuntimeException("Semi-colon required at end of numeric entity");
}
}
int entityValue;
try {
if(isHex) {
@ -80,8 +128,6 @@ public class NumericEntityUnescaper extends CharSequenceTranslator {
out.write(entityValue);
}
boolean semiNext = (end != seqEnd) && (input.charAt(end) == ';');
return 2 + (end - start) + (isHex ? 1 : 0) + (semiNext ? 1 : 0);
}
return 0;

View File

@ -44,12 +44,32 @@ public class NumericEntityUnescaperTest extends TestCase {
}
public void testUnfinishedEntity() {
NumericEntityUnescaper neu = new NumericEntityUnescaper();
// parse it
NumericEntityUnescaper neu = new NumericEntityUnescaper(NumericEntityUnescaper.OPTION.semiColonOptional);
String input = "Test &#x30 not test";
String expected = "Test \u0030 not test";
String result = neu.translate(input);
assertEquals("Failed to support unfinished entities (i.e. missing semi-colon", expected, result);
assertEquals("Failed to support unfinished entities (i.e. missing semi-colon)", expected, result);
// ignore it
neu = new NumericEntityUnescaper();
input = "Test &#x30 not test";
expected = input;
result = neu.translate(input);
assertEquals("Failed to ignore unfinished entities (i.e. missing semi-colon)", expected, result);
// fail it
neu = new NumericEntityUnescaper(NumericEntityUnescaper.OPTION.errorIfNoSemiColon);
input = "Test &#x30 not test";
try {
result = neu.translate(input);
fail("RuntimeException expected");
} catch(RuntimeException re) {
// expected
}
}
}