From 852291e84e6d041bc768032cee171cf641d79012 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Fri, 17 Aug 2012 17:59:38 +0000 Subject: [PATCH] LUCENE-4310: revert ... new test is failing git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1374379 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 ---- .../analysis/charfilter/NormalizeCharMap.java | 3 ++- .../charfilter/TestMappingCharFilter.java | 18 ------------------ .../java/org/apache/lucene/util/fst/Util.java | 15 --------------- 4 files changed, 2 insertions(+), 38 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 2d8d20e329e..a137ef2b67a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -79,10 +79,6 @@ Bug Fixes Also, ensure immutability and use only one instance of this table in RAM (lazy loaded) since its quite large. (sausarkar, Steven Rowe, Robert Muir) -* LUCENE-4310: MappingCharFilter was failing to match input strings - containing non-BMP Unicode characters. (Dawid Weiss, Robert Muir, - Mike McCandless) - Build * LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java index c22203a76a4..c6470611d2c 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java @@ -111,8 +111,9 @@ public class NormalizeCharMap { final org.apache.lucene.util.fst.Builder builder = new org.apache.lucene.util.fst.Builder(FST.INPUT_TYPE.BYTE2, outputs); final IntsRef scratch = new IntsRef(); for(Map.Entry ent : pendingPairs.entrySet()) { - builder.add(Util.toUTF16(ent.getKey(), scratch), + builder.add(Util.toUTF32(ent.getKey(), scratch), new CharsRef(ent.getValue())); + } map = builder.finish(); pendingPairs.clear(); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java index d692a57c328..c4fab5519c0 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java @@ -33,7 +33,6 @@ import org.apache.lucene.analysis.CharFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util._TestUtil; public class TestMappingCharFilter extends BaseTokenStreamTestCase { @@ -56,11 +55,6 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase { builder.add( "empty", "" ); - // BMP (surrogate pair): - builder.add(UnicodeUtil.newString(new int[] {0x1D122}, 0, 1), "fclef"); - - builder.add("\uff01", "full width exclamation"); - normMap = builder.build(); } @@ -134,18 +128,6 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase { assertTokenStreamContents(ts, new String[0], new int[]{}, new int[]{}, 5); } - public void testNonBMPChar() throws Exception { - CharFilter cs = new MappingCharFilter( normMap, new StringReader( UnicodeUtil.newString(new int[] {0x1D122}, 0, 1) ) ); - TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); - assertTokenStreamContents(ts, new String[]{"fclef"}, new int[]{0}, new int[]{2}, 2); - } - - public void testFullWidthChar() throws Exception { - CharFilter cs = new MappingCharFilter( normMap, new StringReader( "\uff01") ); - TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false); - assertTokenStreamContents(ts, new String[]{"full width exclamation"}, new int[]{0}, new int[]{1}, 1); - } - // // 1111111111222 // 01234567890123456789012 diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java index 7a9685745ed..55823a3f829 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java @@ -767,21 +767,6 @@ public final class Util { } } - /** Just maps each UTF16 unit (char) to the ints in an - * IntsRef. */ - public static IntsRef toUTF16(CharSequence s, IntsRef scratch) { - final int charLimit = s.length(); - scratch.grow(charLimit); - int idx = 0; - while(idx < charLimit) { - scratch.ints[idx] = (int) s.charAt(idx); - idx++; - } - scratch.offset = 0; - scratch.length = idx; - return scratch; - } - /** Decodes the Unicode codepoints from the provided * CharSequence and places them in the provided scratch * IntsRef, which must not be null, returning it. */