BAEL-6967-decode-unicode-in-string (#14901)
* BAEL-6967-decode-unicode-in-string * update unit test --------- Co-authored-by: tienvn <tienvn@>
This commit is contained in:
parent
e62aba145b
commit
3a816d6b3d
|
@ -28,12 +28,18 @@
|
|||
<artifactId>commons-vfs2</artifactId>
|
||||
<version>${commons-vfs2.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-text</artifactId>
|
||||
<version>${apache-commons-text.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<properties>
|
||||
<commons-compress.version>1.23.0</commons-compress.version>
|
||||
<ant.version>1.10.13</ant.version>
|
||||
<commons-vfs2.version>2.9.0</commons-vfs2.version>
|
||||
<apache-commons-text.version>1.10.0</apache-commons-text.version>
|
||||
</properties>
|
||||
|
||||
</project>
|
|
@ -0,0 +1,29 @@
|
|||
package com.baeldung.commons.convertunicode;
|
||||
|
||||
import org.apache.commons.text.StringEscapeUtils;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class UnicodeConverterUtil {
|
||||
|
||||
public static String decodeWithApacheCommons(String input) {
|
||||
return StringEscapeUtils.unescapeJava(input);
|
||||
}
|
||||
|
||||
public static String decodeWithPlainJava(String input) {
|
||||
Pattern pattern = Pattern.compile("\\\\u[0-9a-fA-F]{4}");
|
||||
Matcher matcher = pattern.matcher(input);
|
||||
|
||||
StringBuilder decodedString = new StringBuilder();
|
||||
|
||||
while (matcher.find()) {
|
||||
String unicodeSequence = matcher.group();
|
||||
char unicodeChar = (char) Integer.parseInt(unicodeSequence.substring(2), 16);
|
||||
matcher.appendReplacement(decodedString, Character.toString(unicodeChar));
|
||||
}
|
||||
|
||||
matcher.appendTail(decodedString);
|
||||
return decodedString.toString();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
package com.baeldung.commons.convertunicode;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
public class UnicodeConverterUnitTest {
|
||||
|
||||
@Test
|
||||
public void whenInputHaveUnicodeSequences_ThenDecode() {
|
||||
String encodedString = "\\u0048\\u0065\\u006C\\u006C\\u006F World";
|
||||
String expectedDecodedString = "Hello World";
|
||||
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithApacheCommons(encodedString));
|
||||
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithPlainJava(encodedString));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenInputHaveNoUnicodeSequences_ThenDoNothing() {
|
||||
String inputString = "Hello World";
|
||||
assertEquals(inputString, UnicodeConverterUtil.decodeWithApacheCommons(inputString));
|
||||
assertEquals(inputString, UnicodeConverterUtil.decodeWithPlainJava(inputString));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenInputHaveUnicodeSequencesInMiddle_ThenDecode() {
|
||||
String encodedString = "This is a test \\u0069\\u006E the middle.";
|
||||
String expectedDecodedString = "This is a test in the middle.";
|
||||
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithApacheCommons(encodedString));
|
||||
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithPlainJava(encodedString));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenInputHaveMultipleUnicodeSequences_ThenDecode() {
|
||||
String encodedString = "Unicode: \\u0048\\u0065\\u006C\\u006C\\u006F \\u0057\\u006F\\u0072\\u006C\\u0064";
|
||||
String expectedDecodedString = "Unicode: Hello World";
|
||||
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithApacheCommons(encodedString));
|
||||
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithPlainJava(encodedString));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue