BAEL-6967-decode-unicode-in-string (#14901)
* BAEL-6967-decode-unicode-in-string * update unit test --------- Co-authored-by: tienvn <tienvn@>
This commit is contained in:
parent
e62aba145b
commit
3a816d6b3d
|
@ -28,12 +28,18 @@
|
||||||
<artifactId>commons-vfs2</artifactId>
|
<artifactId>commons-vfs2</artifactId>
|
||||||
<version>${commons-vfs2.version}</version>
|
<version>${commons-vfs2.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.commons</groupId>
|
||||||
|
<artifactId>commons-text</artifactId>
|
||||||
|
<version>${apache-commons-text.version}</version>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<commons-compress.version>1.23.0</commons-compress.version>
|
<commons-compress.version>1.23.0</commons-compress.version>
|
||||||
<ant.version>1.10.13</ant.version>
|
<ant.version>1.10.13</ant.version>
|
||||||
<commons-vfs2.version>2.9.0</commons-vfs2.version>
|
<commons-vfs2.version>2.9.0</commons-vfs2.version>
|
||||||
|
<apache-commons-text.version>1.10.0</apache-commons-text.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
</project>
|
</project>
|
|
@ -0,0 +1,29 @@
|
||||||
|
package com.baeldung.commons.convertunicode;
|
||||||
|
|
||||||
|
import org.apache.commons.text.StringEscapeUtils;
|
||||||
|
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
public class UnicodeConverterUtil {
|
||||||
|
|
||||||
|
public static String decodeWithApacheCommons(String input) {
|
||||||
|
return StringEscapeUtils.unescapeJava(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String decodeWithPlainJava(String input) {
|
||||||
|
Pattern pattern = Pattern.compile("\\\\u[0-9a-fA-F]{4}");
|
||||||
|
Matcher matcher = pattern.matcher(input);
|
||||||
|
|
||||||
|
StringBuilder decodedString = new StringBuilder();
|
||||||
|
|
||||||
|
while (matcher.find()) {
|
||||||
|
String unicodeSequence = matcher.group();
|
||||||
|
char unicodeChar = (char) Integer.parseInt(unicodeSequence.substring(2), 16);
|
||||||
|
matcher.appendReplacement(decodedString, Character.toString(unicodeChar));
|
||||||
|
}
|
||||||
|
|
||||||
|
matcher.appendTail(decodedString);
|
||||||
|
return decodedString.toString();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,39 @@
|
||||||
|
package com.baeldung.commons.convertunicode;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
public class UnicodeConverterUnitTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void whenInputHaveUnicodeSequences_ThenDecode() {
|
||||||
|
String encodedString = "\\u0048\\u0065\\u006C\\u006C\\u006F World";
|
||||||
|
String expectedDecodedString = "Hello World";
|
||||||
|
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithApacheCommons(encodedString));
|
||||||
|
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithPlainJava(encodedString));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void whenInputHaveNoUnicodeSequences_ThenDoNothing() {
|
||||||
|
String inputString = "Hello World";
|
||||||
|
assertEquals(inputString, UnicodeConverterUtil.decodeWithApacheCommons(inputString));
|
||||||
|
assertEquals(inputString, UnicodeConverterUtil.decodeWithPlainJava(inputString));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void whenInputHaveUnicodeSequencesInMiddle_ThenDecode() {
|
||||||
|
String encodedString = "This is a test \\u0069\\u006E the middle.";
|
||||||
|
String expectedDecodedString = "This is a test in the middle.";
|
||||||
|
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithApacheCommons(encodedString));
|
||||||
|
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithPlainJava(encodedString));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void whenInputHaveMultipleUnicodeSequences_ThenDecode() {
|
||||||
|
String encodedString = "Unicode: \\u0048\\u0065\\u006C\\u006C\\u006F \\u0057\\u006F\\u0072\\u006C\\u0064";
|
||||||
|
String expectedDecodedString = "Unicode: Hello World";
|
||||||
|
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithApacheCommons(encodedString));
|
||||||
|
assertEquals(expectedDecodedString, UnicodeConverterUtil.decodeWithPlainJava(encodedString));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue