LUCENE-4310: revert ... new test is failing

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1374379 13f79535-47bb-0310-9956-ffa450edef68
2012-08-17 17:59:38 +00:00 · 2012-08-17 17:59:38 +00:00 · 852291e84e
parent b9be13c0ea
commit 852291e84e
4 changed files with 2 additions and 38 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -79,10 +79,6 @@ Bug Fixes
  Also, ensure immutability and use only one instance of this table in RAM (lazy
  loaded) since its quite large. (sausarkar, Steven Rowe, Robert Muir)

-* LUCENE-4310: MappingCharFilter was failing to match input strings
-  containing non-BMP Unicode characters.  (Dawid Weiss, Robert Muir,
-  Mike McCandless)
-
 Build

 * LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for 
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java
@ -111,8 +111,9 @@ public class NormalizeCharMap {
        final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
        final IntsRef scratch = new IntsRef();
        for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
-          builder.add(Util.toUTF16(ent.getKey(), scratch),
+          builder.add(Util.toUTF32(ent.getKey(), scratch),
                      new CharsRef(ent.getValue()));
+      
        }
        map = builder.finish();
        pendingPairs.clear();
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
@ -33,7 +33,6 @@ import org.apache.lucene.analysis.CharFilter;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util._TestUtil;

 public class TestMappingCharFilter extends BaseTokenStreamTestCase {
@ -56,11 +55,6 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {

    builder.add( "empty", "" );

-    // BMP (surrogate pair):
-    builder.add(UnicodeUtil.newString(new int[] {0x1D122}, 0, 1), "fclef");
-
-    builder.add("\uff01", "full width exclamation");
-
    normMap = builder.build();
  }

@ -134,18 +128,6 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
    assertTokenStreamContents(ts, new String[0], new int[]{}, new int[]{}, 5);
  }

-  public void testNonBMPChar() throws Exception {
-    CharFilter cs = new MappingCharFilter( normMap, new StringReader( UnicodeUtil.newString(new int[] {0x1D122}, 0, 1) ) );
-    TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
-    assertTokenStreamContents(ts, new String[]{"fclef"}, new int[]{0}, new int[]{2}, 2);
-  }
-
-  public void testFullWidthChar() throws Exception {
-    CharFilter cs = new MappingCharFilter( normMap, new StringReader( "\uff01") );
-    TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
-    assertTokenStreamContents(ts, new String[]{"full width exclamation"}, new int[]{0}, new int[]{1}, 1);
-  }
-
  //
  //                1111111111222
  //      01234567890123456789012
--- a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
@ -767,21 +767,6 @@ public final class Util {
    }
  }

-  /** Just maps each UTF16 unit (char) to the ints in an
-   *  IntsRef. */
-  public static IntsRef toUTF16(CharSequence s, IntsRef scratch) {
-    final int charLimit = s.length();
-    scratch.grow(charLimit);
-    int idx = 0;
-    while(idx < charLimit) {
-      scratch.ints[idx] = (int) s.charAt(idx);
-      idx++;
-    }
-    scratch.offset = 0;
-    scratch.length = idx;
-    return scratch;
-  }
-
  /** Decodes the Unicode codepoints from the provided
   *  CharSequence and places them in the provided scratch
   *  IntsRef, which must not be null, returning it. */