From a0b798c27a0312df2a110073bd7888993e56d05c Mon Sep 17 00:00:00 2001 From: kaching88 Date: Tue, 14 Jul 2015 01:54:35 +0200 Subject: [PATCH] =?UTF-8?q?LANG-1120:=20StringUtils.stripAccents=20should?= =?UTF-8?q?=20remove=20accents=20from=20"=C5=81"=20and=20"=C5=82"=20(close?= =?UTF-8?q?s=20#105).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../org/apache/commons/lang3/StringUtils.java | 15 ++++++++++++++- .../commons/lang3/StringUtilsTrimEmptyTest.java | 2 ++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/apache/commons/lang3/StringUtils.java b/src/main/java/org/apache/commons/lang3/StringUtils.java index f4ec00bf6..31a572a54 100644 --- a/src/main/java/org/apache/commons/lang3/StringUtils.java +++ b/src/main/java/org/apache/commons/lang3/StringUtils.java @@ -753,11 +753,24 @@ public static String stripAccents(final String input) { return null; } final Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");//$NON-NLS-1$ - final String decomposed = Normalizer.normalize(input, Normalizer.Form.NFD); + final StringBuilder decomposed = new StringBuilder(Normalizer.normalize(input, Normalizer.Form.NFD)); + convertRemainingAccentCharacters(decomposed); // Note that this doesn't correctly remove ligatures... return pattern.matcher(decomposed).replaceAll(StringUtils.EMPTY); } + private static void convertRemainingAccentCharacters(StringBuilder decomposed) { + for (int i = 0; i < decomposed.length(); i++) { + if (decomposed.charAt(i) == '\u0141') { + decomposed.deleteCharAt(i); + decomposed.insert(i, 'L'); + } else if (decomposed.charAt(i) == '\u0142') { + decomposed.deleteCharAt(i); + decomposed.insert(i, 'l'); + } + } + } + // Equals //----------------------------------------------------------------------- /** diff --git a/src/test/java/org/apache/commons/lang3/StringUtilsTrimEmptyTest.java b/src/test/java/org/apache/commons/lang3/StringUtilsTrimEmptyTest.java index 70895b918..f55b28fb8 100644 --- a/src/test/java/org/apache/commons/lang3/StringUtilsTrimEmptyTest.java +++ b/src/test/java/org/apache/commons/lang3/StringUtilsTrimEmptyTest.java @@ -273,5 +273,7 @@ public void testStripAccents() { assertEquals( "Failed empty String", "", StringUtils.stripAccents("") ); assertEquals( "Failed to handle non-accented text", "control", StringUtils.stripAccents("control") ); assertEquals( "Failed to handle easy example", "eclair", StringUtils.stripAccents("\u00E9clair") ); + assertEquals("ALOSZZCN aloszzcn", StringUtils.stripAccents("\u0104\u0141\u00D3\u015A\u017B\u0179\u0106\u0143 " + + "\u0105\u0142\u00F3\u015B\u017C\u017A\u0107\u0144")); } }