LANG-1120: StringUtils.stripAccents should remove accents from "Ł" and "ł" (closes #105).
This commit is contained in:
parent
dd5a0e6e1e
commit
a0b798c27a
|
@ -753,11 +753,24 @@ public static String stripAccents(final String input) {
|
|||
return null;
|
||||
}
|
||||
final Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");//$NON-NLS-1$
|
||||
final String decomposed = Normalizer.normalize(input, Normalizer.Form.NFD);
|
||||
final StringBuilder decomposed = new StringBuilder(Normalizer.normalize(input, Normalizer.Form.NFD));
|
||||
convertRemainingAccentCharacters(decomposed);
|
||||
// Note that this doesn't correctly remove ligatures...
|
||||
return pattern.matcher(decomposed).replaceAll(StringUtils.EMPTY);
|
||||
}
|
||||
|
||||
private static void convertRemainingAccentCharacters(StringBuilder decomposed) {
|
||||
for (int i = 0; i < decomposed.length(); i++) {
|
||||
if (decomposed.charAt(i) == '\u0141') {
|
||||
decomposed.deleteCharAt(i);
|
||||
decomposed.insert(i, 'L');
|
||||
} else if (decomposed.charAt(i) == '\u0142') {
|
||||
decomposed.deleteCharAt(i);
|
||||
decomposed.insert(i, 'l');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Equals
|
||||
//-----------------------------------------------------------------------
|
||||
/**
|
||||
|
|
|
@ -273,5 +273,7 @@ public void testStripAccents() {
|
|||
assertEquals( "Failed empty String", "", StringUtils.stripAccents("") );
|
||||
assertEquals( "Failed to handle non-accented text", "control", StringUtils.stripAccents("control") );
|
||||
assertEquals( "Failed to handle easy example", "eclair", StringUtils.stripAccents("\u00E9clair") );
|
||||
assertEquals("ALOSZZCN aloszzcn", StringUtils.stripAccents("\u0104\u0141\u00D3\u015A\u017B\u0179\u0106\u0143 "
|
||||
+ "\u0105\u0142\u00F3\u015B\u017C\u017A\u0107\u0144"));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue