From dafb8e85dcb76a299c6add11ecb62acf702d6c2a Mon Sep 17 00:00:00 2001 From: Henri Yandell Date: Mon, 14 Dec 2009 07:31:42 +0000 Subject: [PATCH] Adding stripAccents method to StringUtils. It requires JDK 1.6 (and accesses it via reflection until Lang becomes 1.6 dependent). LANG-285 git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@890205 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/commons/lang3/StringUtils.java | 64 +++++++++++++++++++ .../lang3/StringUtilsTrimEmptyTest.java | 28 ++++++++ 2 files changed, 92 insertions(+) diff --git a/src/java/org/apache/commons/lang3/StringUtils.java b/src/java/org/apache/commons/lang3/StringUtils.java index 1b7214368..14563aa66 100644 --- a/src/java/org/apache/commons/lang3/StringUtils.java +++ b/src/java/org/apache/commons/lang3/StringUtils.java @@ -607,6 +607,70 @@ public class StringUtils { return newArr; } + /** + *

Removes the accents from a string.

+ *

NOTE: This is a JDK 1.6 method, it will fail on JDK 1.5.

+ * + *
+     * StringUtils.stripAccents(null)                = null
+     * StringUtils.stripAccents("")                  = ""
+     * StringUtils.stripAccents("control")           = "control"
+     * StringUtils.stripAccents("&ecute;clair")      = "eclair"
+     * 
+ * + * @param input String to be stripped + * @return String without accents on the text + * + * @since 3.0 + */ + public static String stripAccents(String input) { + if(input == null) { + return null; + } + if(SystemUtils.isJavaVersionAtLeast(1.6f)) { + + // String decomposed = Normalizer.normalize(input, Normalizer.Form.NFD); + + // START of 1.5 reflection - in 1.6 use the line commented out above + try { + // get java.text.Normalizer.Form class + Class normalizerFormClass = ClassUtils.getClass("java.text.Normalizer$Form", false); + + // get Normlizer class + Class normalizerClass = ClassUtils.getClass("java.text.Normalizer", false); + + // get static method on Normalizer + java.lang.reflect.Method method = normalizerClass.getMethod("normalize", CharSequence.class, normalizerFormClass ); + + // get Normalizer.NFD field + java.lang.reflect.Field nfd = normalizerFormClass.getField("NFD"); + + // invoke method + String decomposed = (String) method.invoke( null, input, nfd.get(null) ); + // END of 1.5 reflection + + java.util.regex.Pattern accentPattern = java.util.regex.Pattern.compile("\\p{InCombiningDiacriticalMarks}+"); + return accentPattern.matcher(decomposed).replaceAll(""); + } catch(ClassNotFoundException cnfe) { + throw new RuntimeException("ClassNotFoundException occurred during 1.6 backcompat code", cnfe); + } catch(NoSuchMethodException nsme) { + throw new RuntimeException("NoSuchMethodException occurred during 1.6 backcompat code", nsme); + } catch(NoSuchFieldException nsfe) { + throw new RuntimeException("NoSuchFieldException occurred during 1.6 backcompat code", nsfe); + } catch(IllegalAccessException iae) { + throw new RuntimeException("IllegalAccessException occurred during 1.6 backcompat code", iae); + } catch(IllegalArgumentException iae) { + throw new RuntimeException("IllegalArgumentException occurred during 1.6 backcompat code", iae); + } catch(java.lang.reflect.InvocationTargetException ite) { + throw new RuntimeException("InvocationTargetException occurred during 1.6 backcompat code", ite); + } catch(SecurityException se) { + throw new RuntimeException("SecurityException occurred during 1.6 backcompat code", se); + } + } else { + throw new UnsupportedOperationException("The stripAccents(String) method is not supported until Java 1.6"); + } + } + // Equals //----------------------------------------------------------------------- /** diff --git a/src/test/org/apache/commons/lang3/StringUtilsTrimEmptyTest.java b/src/test/org/apache/commons/lang3/StringUtilsTrimEmptyTest.java index 56849041a..273f98027 100644 --- a/src/test/org/apache/commons/lang3/StringUtilsTrimEmptyTest.java +++ b/src/test/org/apache/commons/lang3/StringUtilsTrimEmptyTest.java @@ -264,6 +264,34 @@ public class StringUtilsTrimEmptyTest extends TestCase { assertArrayEquals(foo, StringUtils.stripAll(fooDots, ".")); } + public void testStripAccents() { + if(SystemUtils.isJavaVersionAtLeast(1.6f)) { + String cue = "\u00C7\u00FA\u00EA"; + assertEquals( "Failed to strip accents from " + cue, "Cue", StringUtils.stripAccents(cue)); + + String lots = "\u00C0\u00C1\u00C2\u00C3\u00C4\u00C5\u00C7\u00C8\u00C9" + + "\u00CA\u00CB\u00CC\u00CD\u00CE\u00CF\u00D1\u00D2\u00D3" + + "\u00D4\u00D5\u00D6\u00D9\u00DA\u00DB\u00DC\u00DD"; + assertEquals( "Failed to strip accents from " + lots, + "AAAAAACEEEEIIIINOOOOOUUUUY", + StringUtils.stripAccents(lots)); + + assertNull( "Failed null safety", StringUtils.stripAccents(null) ); + assertEquals( "Failed empty String", "", StringUtils.stripAccents("") ); + assertEquals( "Failed to handle non-accented text", "control", StringUtils.stripAccents("control") ); + assertEquals( "Failed to handle easy example", "eclair", StringUtils.stripAccents("\u00E9clair") ); + } else { + try { + StringUtils.stripAccents("string"); + fail("Before JDK 1.6, stripAccents is not expected to work"); + } catch(UnsupportedOperationException uoe) { + assertEquals("The stripAccents(String) method is not supported until Java 1.6", uoe.getMessage()); + } + } + } + + //----------------------------------------------------------------------- + private void assertArrayEquals(Object[] o1, Object[] o2) { if(o1 == null) { assertEquals(o1,o2);