https://issues.apache.org/jira/browse/LANG-607 StringUtils.containsAny methods incorrectly matches Unicode 2.0+ supplementary characters.

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@922703 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Gary D. Gregory 2010-03-14 00:54:03 +00:00
parent 8a8859cb88
commit 1ced894df7
2 changed files with 79 additions and 14 deletions

View File

@ -1437,20 +1437,32 @@ public static int indexOfAny(CharSequence cs, String searchChars) {
* <code>false</code> if no match or null input
* @since 2.4
*/
public static boolean containsAny(CharSequence cs, char[] searchChars) {
if (cs == null || cs.length() == 0 || searchChars == null || searchChars.length == 0) {
return false;
}
for (int i = 0; i < cs.length(); i++) {
char ch = cs.charAt(i);
for (int j = 0; j < searchChars.length; j++) {
if (searchChars[j] == ch) {
return true;
}
}
}
return false;
}
public static boolean containsAny(CharSequence cs, char[] searchChars) {
if (isEmpty(cs) || ArrayUtils.isEmpty(searchChars)) {
return false;
}
int csLength = cs.length();
int searchLength = searchChars.length;
int csLastIndex = csLength - 1;
int searchLastIndex = searchLength - 1;
for (int i = 0; i < csLength; i++) {
char ch = cs.charAt(i);
for (int j = 0; j < searchLength; j++) {
if (searchChars[j] == ch) {
if (i < csLastIndex && j < searchLastIndex && ch >= Character.MIN_HIGH_SURROGATE && ch <= Character.MAX_HIGH_SURROGATE) {
// ch is a supplementary character
if (searchChars[j + 1] == cs.charAt(i + 1)) {
return true;
}
} else {
// ch is in the Basic Multilingual Plane
return true;
}
}
}
}
return false;
}
/**
* <p>

View File

@ -33,7 +33,19 @@ public class StringUtilsEqualsIndexOfTest extends TestCase {
private static final String BAR = "bar";
private static final String FOOBAR = "foobar";
private static final String[] FOOBAR_SUB_ARRAY = new String[] {"ob", "ba"};
/**
* Supplementary character U+20000
* See http://java.sun.com/developer/technicalArticles/Intl/Supplementary/
*/
private static final String CharU20000 = "\uD840\uDC00";
/**
* Supplementary character U+20001
* See http://java.sun.com/developer/technicalArticles/Intl/Supplementary/
*/
private static final String CharU20001 = "\uD840\uDC01";
public StringUtilsEqualsIndexOfTest(String name) {
super(name);
}
@ -643,4 +655,45 @@ public void testContainsNone_Chararray() {
assertEquals(true, StringUtils.containsNone(str3, chars3));
}
/**
* See http://java.sun.com/developer/technicalArticles/Intl/Supplementary/
*/
public void testContainsStringWithSupplementaryChars() {
assertEquals(true, StringUtils.contains(CharU20000 + CharU20001, CharU20000));
assertEquals(true, StringUtils.contains(CharU20000 + CharU20001, CharU20001));
assertEquals(true, StringUtils.contains(CharU20000, CharU20000));
assertEquals(false, StringUtils.contains(CharU20000, CharU20001));
}
/**
* See http://java.sun.com/developer/technicalArticles/Intl/Supplementary/
*/
public void testContainsAnyStringWithSupplementaryChars() {
assertEquals(true, StringUtils.containsAny(CharU20000 + CharU20001, CharU20000));
assertEquals(true, StringUtils.containsAny(CharU20000 + CharU20001, CharU20001));
assertEquals(true, StringUtils.containsAny(CharU20000, CharU20000));
// Sanity check:
assertEquals(-1, CharU20000.indexOf(CharU20001));
assertEquals(0, CharU20000.indexOf(CharU20001.charAt(0)));
assertEquals(-1, CharU20000.indexOf(CharU20001.charAt(1)));
// Test:
assertEquals(false, StringUtils.containsAny(CharU20000, CharU20001));
assertEquals(false, StringUtils.containsAny(CharU20001, CharU20000));
}
/**
* See http://java.sun.com/developer/technicalArticles/Intl/Supplementary/
*/
public void testContainsAnyCharArrayWithSupplementaryChars() {
assertEquals(true, StringUtils.containsAny(CharU20000 + CharU20001, CharU20000.toCharArray()));
assertEquals(true, StringUtils.containsAny(CharU20000 + CharU20001, CharU20001.toCharArray()));
assertEquals(true, StringUtils.containsAny(CharU20000, CharU20000.toCharArray()));
// Sanity check:
assertEquals(-1, CharU20000.indexOf(CharU20001));
assertEquals(0, CharU20000.indexOf(CharU20001.charAt(0)));
assertEquals(-1, CharU20000.indexOf(CharU20001.charAt(1)));
// Test:
assertEquals(false, StringUtils.containsAny(CharU20000, CharU20001.toCharArray()));
assertEquals(false, StringUtils.containsAny(CharU20001, CharU20000.toCharArray()));
}
}