Implementing an option to UnicodeUnescaper in which the syntax '\u+0047' is supported. By default it remains unsupported to match Java's method of parsing. Request in LANG-507
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@826370 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2dc5618257
commit
64c615346f
|
@ -26,6 +26,15 @@ import java.io.Writer;
|
||||||
*/
|
*/
|
||||||
public class UnicodeUnescaper extends CharSequenceTranslator {
|
public class UnicodeUnescaper extends CharSequenceTranslator {
|
||||||
|
|
||||||
|
private boolean escapingPlus = false;
|
||||||
|
|
||||||
|
public void setEscapingPlus(boolean b) {
|
||||||
|
this.escapingPlus = b;
|
||||||
|
}
|
||||||
|
public boolean isEscapingPlus() {
|
||||||
|
return this.escapingPlus;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* {@inheritDoc}
|
* {@inheritDoc}
|
||||||
*/
|
*/
|
||||||
|
@ -39,6 +48,13 @@ public class UnicodeUnescaper extends CharSequenceTranslator {
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// consume + symbol in \\u+0045
|
||||||
|
if(isEscapingPlus()) {
|
||||||
|
if( (index + i < input.length()) && (input.charAt(index + i) == '+') ) {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if( (index + i + 4 <= input.length()) ) {
|
if( (index + i + 4 <= input.length()) ) {
|
||||||
// Get 4 hex digits
|
// Get 4 hex digits
|
||||||
CharSequence unicode = input.subSequence(index + i, index + i + 4);
|
CharSequence unicode = input.subSequence(index + i, index + i + 4);
|
||||||
|
@ -47,7 +63,7 @@ public class UnicodeUnescaper extends CharSequenceTranslator {
|
||||||
int value = Integer.parseInt(unicode.toString(), 16);
|
int value = Integer.parseInt(unicode.toString(), 16);
|
||||||
out.write((char) value);
|
out.write((char) value);
|
||||||
} catch (NumberFormatException nfe) {
|
} catch (NumberFormatException nfe) {
|
||||||
throw new RuntimeException("Unable to parse unicode value: " + unicode, nfe);
|
throw new IllegalArgumentException("Unable to parse unicode value: " + unicode, nfe);
|
||||||
}
|
}
|
||||||
return i + 4;
|
return i + 4;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -27,6 +27,22 @@ import java.io.IOException;
|
||||||
*/
|
*/
|
||||||
public class UnicodeUnescaperTest extends TestCase {
|
public class UnicodeUnescaperTest extends TestCase {
|
||||||
|
|
||||||
|
// Requested in LANG-507
|
||||||
|
public void testUPlus() throws IOException {
|
||||||
|
UnicodeUnescaper uu = new UnicodeUnescaper();
|
||||||
|
|
||||||
|
String input = "\\u+0047";
|
||||||
|
try {
|
||||||
|
String result = uu.translate(input);
|
||||||
|
fail("Default behaviour should not parse u+");
|
||||||
|
} catch(IllegalArgumentException iae) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
|
||||||
|
uu.setEscapingPlus(true);
|
||||||
|
assertEquals("Failed to unescape unicode characters with 'u+' notation", "G", uu.translate(input));
|
||||||
|
}
|
||||||
|
|
||||||
public void testUuuuu() throws IOException {
|
public void testUuuuu() throws IOException {
|
||||||
UnicodeUnescaper uu = new UnicodeUnescaper();
|
UnicodeUnescaper uu = new UnicodeUnescaper();
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue