From 409f252b0ea6df2ad791c6ac615efdbe37dfa25b Mon Sep 17 00:00:00 2001 From: DOHA Date: Sat, 1 Sep 2018 21:57:20 +0300 Subject: [PATCH] remove emojis --- java-strings/pom.xml | 7 ++ .../RemovingEmojiFromStringUnitTest.java | 78 +++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 java-strings/src/test/java/com/baeldung/string/RemovingEmojiFromStringUnitTest.java diff --git a/java-strings/pom.xml b/java-strings/pom.xml index 86b8924c4b..0c83b4d9e7 100644 --- a/java-strings/pom.xml +++ b/java-strings/pom.xml @@ -52,6 +52,13 @@ icu4j ${icu4j.version} + + + com.vdurmont + emoji-java + 4.0.0 + + diff --git a/java-strings/src/test/java/com/baeldung/string/RemovingEmojiFromStringUnitTest.java b/java-strings/src/test/java/com/baeldung/string/RemovingEmojiFromStringUnitTest.java new file mode 100644 index 0000000000..163f28d0d8 --- /dev/null +++ b/java-strings/src/test/java/com/baeldung/string/RemovingEmojiFromStringUnitTest.java @@ -0,0 +1,78 @@ +package com.baeldung.string; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.CoreMatchers.not; +import static org.junit.Assert.assertThat; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.junit.Test; + +import com.vdurmont.emoji.EmojiParser; + +public class RemovingEmojiFromStringUnitTest { + String text = "la confรฉrence, commencera ร  10 heures ๐Ÿ˜… โœฟ"; + String regex = "[^\\p{L}\\p{N}\\p{P}\\p{Z}]"; + + @Test + public void whenRemoveEmojiUsingLibrary_thenSuccess() { + String result = EmojiParser.removeAllEmojis(text); + System.out.println(result); + assertThat(result, not(containsString("๐Ÿ˜…"))); + assertThat(result, containsString("ร ")); + assertThat(result, containsString("la")); + assertThat(result, containsString("10")); + } + + @Test + public void whenReplaceEmojiUsingLibrary_thenSuccess() { + String result = EmojiParser.parseToAliases(text); + System.out.println(result); + assertThat(result, not(containsString("๐Ÿ˜…"))); + assertThat(result, containsString("sweat_smile")); + } + + @Test + public void whenRemoveEmojiUsingRegex_thenSuccess() { + String result = text.replaceAll(regex, ""); + System.out.println(result); + assertThat(result, not(containsString("๐Ÿ˜…"))); + assertThat(result, containsString("ร ")); + assertThat(result, containsString("la")); + assertThat(result, containsString("10")); + } + + @Test + public void whenRemoveEmojiUsingMatcher_thenSuccess() { + Pattern pattern = Pattern.compile(regex, Pattern.UNICODE_CHARACTER_CLASS); + Matcher matcher = pattern.matcher(text); + + String result = matcher.replaceAll(""); + System.out.println(result); + assertThat(result, not(containsString("๐Ÿ˜…"))); + assertThat(result, containsString("ร ")); + assertThat(result, containsString("la")); + assertThat(result, containsString("10")); + } + + @Test + public void whenRemoveEmojiUsingCodepoints_thenSuccess() { + String result = text.replaceAll("[\\x{0001f300}-\\x{0001f64f}]|[\\x{0001f680}-\\x{0001f6ff}]", ""); + System.out.println(result); + assertThat(result, not(containsString("๐Ÿ˜…"))); + assertThat(result, containsString("ร ")); + assertThat(result, containsString("la")); + assertThat(result, containsString("10")); + } + + @Test + public void whenRemoveEmojiUsingUnicode_thenSuccess() { + String result = text.replaceAll("[\ud83c\udf00-\ud83d\ude4f]|[\ud83d\ude80-\ud83d\udeff]", ""); + System.out.println(result); + assertThat(result, not(containsString("๐Ÿ˜…"))); + assertThat(result, containsString("ร ")); + assertThat(result, containsString("la")); + assertThat(result, containsString("10")); + } +}