LANG-1120: StringUtils.stripAccents should remove accents from "Ł" and "ł" (closes #105).

This commit is contained in:
kaching88 2015-07-14 01:54:35 +02:00 committed by pascalschumacher
parent dd5a0e6e1e
commit a0b798c27a
2 changed files with 16 additions and 1 deletions

View File

@ -753,11 +753,24 @@ public static String stripAccents(final String input) {
return null; return null;
} }
final Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");//$NON-NLS-1$ final Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");//$NON-NLS-1$
final String decomposed = Normalizer.normalize(input, Normalizer.Form.NFD); final StringBuilder decomposed = new StringBuilder(Normalizer.normalize(input, Normalizer.Form.NFD));
convertRemainingAccentCharacters(decomposed);
// Note that this doesn't correctly remove ligatures... // Note that this doesn't correctly remove ligatures...
return pattern.matcher(decomposed).replaceAll(StringUtils.EMPTY); return pattern.matcher(decomposed).replaceAll(StringUtils.EMPTY);
} }
private static void convertRemainingAccentCharacters(StringBuilder decomposed) {
for (int i = 0; i < decomposed.length(); i++) {
if (decomposed.charAt(i) == '\u0141') {
decomposed.deleteCharAt(i);
decomposed.insert(i, 'L');
} else if (decomposed.charAt(i) == '\u0142') {
decomposed.deleteCharAt(i);
decomposed.insert(i, 'l');
}
}
}
// Equals // Equals
//----------------------------------------------------------------------- //-----------------------------------------------------------------------
/** /**

View File

@ -273,5 +273,7 @@ public void testStripAccents() {
assertEquals( "Failed empty String", "", StringUtils.stripAccents("") ); assertEquals( "Failed empty String", "", StringUtils.stripAccents("") );
assertEquals( "Failed to handle non-accented text", "control", StringUtils.stripAccents("control") ); assertEquals( "Failed to handle non-accented text", "control", StringUtils.stripAccents("control") );
assertEquals( "Failed to handle easy example", "eclair", StringUtils.stripAccents("\u00E9clair") ); assertEquals( "Failed to handle easy example", "eclair", StringUtils.stripAccents("\u00E9clair") );
assertEquals("ALOSZZCN aloszzcn", StringUtils.stripAccents("\u0104\u0141\u00D3\u015A\u017B\u0179\u0106\u0143 "
+ "\u0105\u0142\u00F3\u015B\u017C\u017A\u0107\u0144"));
} }
} }