diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2CharFilter.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2CharFilter.java index 1276c6cc9ae..13d1c952978 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2CharFilter.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2CharFilter.java @@ -190,7 +190,7 @@ public final class ICUNormalizer2CharFilter extends BaseCharFilter { addOffCorrectMap(charCount + i, cumuDiff - i); } } else { - addOffCorrectMap(charCount + Math.min(1, outputLength), cumuDiff + diff); + addOffCorrectMap(charCount + outputLength, cumuDiff + diff); } charCount += outputLength; } diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java index 51ece34b982..6cf715e9efd 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java @@ -222,4 +222,194 @@ public class TestICUNormalizer2CharFilter extends BaseTokenStreamTestCase { checkAnalysisConsistency(random(), a, false, text); } } + + public void testCuriousMassiveString() throws Exception { + String text = "yqt \u0728\u0707\u0712\u0720\u0734 \u204c\u201d hyipy \u2667\u2619" + + "\u26ec\u267b\u26da uboyjwfbv \u2012\u205d\u2042\u200a\u2047\u2040 gyxmmz yvv %" + + "\ufb86 \n