From 12e597a78cbf9d82414b944a5b285d81a2e27e51 Mon Sep 17 00:00:00 2001 From: MarkDacek Date: Wed, 8 Mar 2017 22:58:51 -0500 Subject: [PATCH] LANG-1300: fixed CharSequenceUtils indexOf for Supplementary chars --- .../commons/lang3/CharSequenceUtils.java | 37 ++++++++++--------- .../lang3/StringUtilsEqualsIndexOfTest.java | 2 + 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java b/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java index 1d97e5568..cc3920232 100644 --- a/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java +++ b/src/main/java/org/apache/commons/lang3/CharSequenceUtils.java @@ -84,17 +84,19 @@ static int indexOf(final CharSequence cs, final int searchChar, int start) { } //supplementary characters (LANG1300) if (searchChar <= Character.MAX_CODE_POINT) { + int ind = 0; char[] chars = Character.toChars(searchChar); - for (int i = start; i < sz; i++) { - if (cs.charAt(i) == chars[0]) { - if (i + 1 == sz) { - break; - } - if (cs.charAt(i + 1) == chars[1]) { - return i; - } - } - } + for (int i = start; i < sz - 1; i++) { + char high = cs.charAt(i); + char low = cs.charAt(i + 1); + if (high == chars[0] && low == chars[1]) { + return ind; + } else if (Character.isSurrogatePair(high, low)) { + //skip over 1 + i++; + } + ind++; + } } return NOT_FOUND; } @@ -148,17 +150,18 @@ static int lastIndexOf(final CharSequence cs, final int searchChar, int start) { } } //supplementary characters (LANG1300) + //NOTE - we must do a forward traversal for this to avoid duplicating code points if (searchChar <= Character.MAX_CODE_POINT) { char[] chars = Character.toChars(searchChar); for (int i = start; i >= 0; --i) { if (cs.charAt(i) == chars[0]) { - if (i + 1 == cs.length()) { - break; - } - if (cs.charAt(i + 1) == chars[1]) { - return i; - } - } + if (i + 1 == cs.length()) { + break; + } + if (cs.charAt(i + 1) == chars[1]) { + return i; + } + } } } return NOT_FOUND; diff --git a/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java b/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java index 1c99fa414..8326061d2 100644 --- a/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java +++ b/src/test/java/org/apache/commons/lang3/StringUtilsEqualsIndexOfTest.java @@ -300,6 +300,8 @@ public void testIndexOf_charInt() { StringBuilder builder = new StringBuilder(); builder.appendCodePoint(CODE_POINT); assertEquals(0, StringUtils.indexOf(builder, CODE_POINT, 0)); + builder.appendCodePoint(CODE_POINT); + assertEquals(1, StringUtils.indexOf(builder, CODE_POINT, 1)); //inner branch on the supplementary character block char[] tmp = {(char) 55361}; builder = new StringBuilder();