Implementing an option to UnicodeUnescaper in which the syntax '\u+0047' is supported. By default it remains unsupported to match Java's method of parsing. Request in LANG-507

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@826370 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Henri Yandell 2009-10-18 07:25:59 +00:00
parent 2dc5618257
commit 64c615346f
2 changed files with 33 additions and 1 deletions

View File

@ -26,6 +26,15 @@ import java.io.Writer;
*/ */
public class UnicodeUnescaper extends CharSequenceTranslator { public class UnicodeUnescaper extends CharSequenceTranslator {
private boolean escapingPlus = false;
public void setEscapingPlus(boolean b) {
this.escapingPlus = b;
}
public boolean isEscapingPlus() {
return this.escapingPlus;
}
/** /**
* {@inheritDoc} * {@inheritDoc}
*/ */
@ -39,6 +48,13 @@ public class UnicodeUnescaper extends CharSequenceTranslator {
i++; i++;
} }
// consume + symbol in \\u+0045
if(isEscapingPlus()) {
if( (index + i < input.length()) && (input.charAt(index + i) == '+') ) {
i++;
}
}
if( (index + i + 4 <= input.length()) ) { if( (index + i + 4 <= input.length()) ) {
// Get 4 hex digits // Get 4 hex digits
CharSequence unicode = input.subSequence(index + i, index + i + 4); CharSequence unicode = input.subSequence(index + i, index + i + 4);
@ -47,7 +63,7 @@ public class UnicodeUnescaper extends CharSequenceTranslator {
int value = Integer.parseInt(unicode.toString(), 16); int value = Integer.parseInt(unicode.toString(), 16);
out.write((char) value); out.write((char) value);
} catch (NumberFormatException nfe) { } catch (NumberFormatException nfe) {
throw new RuntimeException("Unable to parse unicode value: " + unicode, nfe); throw new IllegalArgumentException("Unable to parse unicode value: " + unicode, nfe);
} }
return i + 4; return i + 4;
} else { } else {

View File

@ -27,6 +27,22 @@ import java.io.IOException;
*/ */
public class UnicodeUnescaperTest extends TestCase { public class UnicodeUnescaperTest extends TestCase {
// Requested in LANG-507
public void testUPlus() throws IOException {
UnicodeUnescaper uu = new UnicodeUnescaper();
String input = "\\u+0047";
try {
String result = uu.translate(input);
fail("Default behaviour should not parse u+");
} catch(IllegalArgumentException iae) {
// expected
}
uu.setEscapingPlus(true);
assertEquals("Failed to unescape unicode characters with 'u+' notation", "G", uu.translate(input));
}
public void testUuuuu() throws IOException { public void testUuuuu() throws IOException {
UnicodeUnescaper uu = new UnicodeUnescaper(); UnicodeUnescaper uu = new UnicodeUnescaper();