LUCENE-1987: Remove lots of deprecations from analysis package. Undeprecate some Token ctors.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@826404 13f79535-47bb-0310-9956-ffa450edef68
2009-10-18 10:37:56 +00:00 · 2009-10-18 10:37:56 +00:00 · 376873eba5
parent 173d95a166
commit 376873eba5
7 changed files with 34 additions and 199 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -75,6 +75,12 @@ API Changes
 * LUCENE-1979: Remove remaining deprecations from indexer package.
  (Uwe Schindler, Michael Busch)

+* LUCENE-1989: Generify CharArraySet. (Uwe Schindler)
+
+* LUCENE-1987: Rremove deprecations from analysis package and Token.
+  Un-deprecate some ctors of Token, as they are still useful.
+  (Uwe Schindler)
+
 Bug fixes

 * LUCENE-1951: When the text provided to WildcardQuery has no wildcard
--- a/common-build.xml
+++ b/common-build.xml
@ -42,7 +42,7 @@
  <property name="Name" value="Lucene"/>
  <property name="dev.version" value="3.0-dev"/>
  <property name="version" value="${dev.version}"/>
-  <property name="compatibility.tag" value="lucene_2_9_back_compat_tests_20091014"/>
+  <property name="compatibility.tag" value="lucene_2_9_back_compat_tests_20091018"/>
  <property name="spec.version" value="${version}"/>	
  <property name="year" value="2000-${current.year}"/>
  <property name="final.name" value="lucene-${name}-${version}"/>
--- a/src/java/org/apache/lucene/analysis/CharacterCache.java
+++ b/src/java/org/apache/lucene/analysis/CharacterCache.java
@ -1,47 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.analysis;
-
-/**
- * Replacement for Java 1.5 Character.valueOf()
- * @deprecated Move to Character.valueOf() in 3.0
- */
-class CharacterCache {
-
-  private static final Character cache[] = new Character[128];
-
-  static {
-    for (int i = 0; i < cache.length; i++) {
-      cache[i] = new Character((char) i);
-    }
-  }
-
-  /**
-   * Returns a Character instance representing the given char value
-   * 
-   * @param c
-   *          a char value
-   * @return a Character representation of the given char value.
-   */
-  public static Character valueOf(char c) {
-    if (c < cache.length) {
-      return cache[(int) c];
-    }
-    return new Character(c);
-  }
-}
--- a/src/java/org/apache/lucene/analysis/MappingCharFilter.java
+++ b/src/java/org/apache/lucene/analysis/MappingCharFilter.java
@ -30,8 +30,7 @@ import java.util.LinkedList;
 public class MappingCharFilter extends BaseCharFilter {

  private final NormalizeCharMap normMap;
-  //private LinkedList<Character> buffer;
-  private LinkedList buffer;
+  private LinkedList<Character> buffer;
  private String replacement;
  private int charPointer;
  private int nextCharCounter;
@ -57,7 +56,7 @@ public class MappingCharFilter extends BaseCharFilter {
      int firstChar = nextChar();
      if (firstChar == -1) return -1;
      NormalizeCharMap nm = normMap.submap != null ?
-        (NormalizeCharMap)normMap.submap.get(CharacterCache.valueOf((char) firstChar)) : null;
+        normMap.submap.get(Character.valueOf((char) firstChar)) : null;
      if (nm == null) return firstChar;
      NormalizeCharMap result = match(nm);
      if (result == null) return firstChar;
@ -78,7 +77,7 @@ public class MappingCharFilter extends BaseCharFilter {
  private int nextChar() throws IOException {
    nextCharCounter++;
    if (buffer != null && !buffer.isEmpty()) {
-      return ((Character)buffer.removeFirst()).charValue();
+      return buffer.removeFirst().charValue();
    }
    return input.read();
  }
@ -86,15 +85,15 @@ public class MappingCharFilter extends BaseCharFilter {
  private void pushChar(int c) {
    nextCharCounter--;
    if(buffer == null)
-      buffer = new LinkedList();
-    buffer.addFirst(new Character((char) c));
+      buffer = new LinkedList<Character>();
+    buffer.addFirst(Character.valueOf((char) c));
  }

  private void pushLastChar(int c) {
    if (buffer == null) {
-      buffer = new LinkedList();
+      buffer = new LinkedList<Character>();
    }
-    buffer.addLast(new Character((char) c));
+    buffer.addLast(Character.valueOf((char) c));
  }

  private NormalizeCharMap match(NormalizeCharMap map) throws IOException {
@ -102,7 +101,7 @@ public class MappingCharFilter extends BaseCharFilter {
    if (map.submap != null) {
      int chr = nextChar();
      if (chr != -1) {
-        NormalizeCharMap subMap = (NormalizeCharMap) map.submap.get(CharacterCache.valueOf((char) chr));
+        NormalizeCharMap subMap = map.submap.get(Character.valueOf((char) chr));
        if (subMap != null) {
          result = match(subMap);
        }
--- a/src/java/org/apache/lucene/analysis/NormalizeCharMap.java
+++ b/src/java/org/apache/lucene/analysis/NormalizeCharMap.java
@ -45,10 +45,10 @@ public class NormalizeCharMap {
      if (currMap.submap == null) {
        currMap.submap = new HashMap<Character, NormalizeCharMap>(1);
      }
-      NormalizeCharMap map = currMap.submap.get(CharacterCache.valueOf(c));
+      NormalizeCharMap map = currMap.submap.get(Character.valueOf(c));
      if (map == null) {
        map = new NormalizeCharMap();
-        currMap.submap.put(new Character(c), map);
+        currMap.submap.put(Character.valueOf(c), map);
      }
      currMap = map;
    }
--- a/src/java/org/apache/lucene/analysis/Token.java
+++ b/src/java/org/apache/lucene/analysis/Token.java
@ -58,18 +58,6 @@ import org.apache.lucene.util.AttributeImpl;
  to easily switch from the old to the new TokenStream API.
  
  <br><br>
-
-  <p><b>NOTE:</b> As of 2.3, Token stores the term text
-  internally as a malleable char[] termBuffer instead of
-  String termText.  The indexing code and core tokenizers
-  have been changed to re-use a single Token instance, changing
-  its buffer and other fields in-place as the Token is
-  processed.  This provides substantially better indexing
-  performance as it saves the GC cost of new'ing a Token and
-  String for every term.  The APIs that accept String
-  termText are still available but a warning about the
-  associated performance cost has been added (below).  The
-  {@link #termText()} method has been deprecated.</p>
  
  <p>Tokenizers and filters should try to re-use a Token
  instance when possible for best performance, by
@ -135,61 +123,13 @@ public class Token extends AttributeImpl

  private static int MIN_BUFFER_SIZE = 10;

-  /** @deprecated We will remove this when we remove the
-   * deprecated APIs */
-  private String termText;
-
-  /**
-   * Characters for the term text.
-   * @deprecated This will be made private. Instead, use:
-   * {@link #termBuffer()}, 
-   * {@link #setTermBuffer(char[], int, int)},
-   * {@link #setTermBuffer(String)}, or
-   * {@link #setTermBuffer(String, int, int)}
-   */
-  char[] termBuffer;
-
-  /**
-   * Length of term text in the buffer.
-   * @deprecated This will be made private. Instead, use:
-   * {@link #termLength()}, or @{link setTermLength(int)}.
-   */
-  int termLength;
-
-  /**
-   * Start in source text.
-   * @deprecated This will be made private. Instead, use:
-   * {@link #startOffset()}, or @{link setStartOffset(int)}.
-   */
-  int startOffset;
-
-  /**
-   * End in source text.
-   * @deprecated This will be made private. Instead, use:
-   * {@link #endOffset()}, or @{link setEndOffset(int)}.
-   */
-  int endOffset;
-
-  /**
-   * The lexical type of the token.
-   * @deprecated This will be made private. Instead, use:
-   * {@link #type()}, or @{link setType(String)}.
-   */
-  String type = DEFAULT_TYPE;
-
+  private char[] termBuffer;
+  private int termLength;
+  private int startOffset,endOffset;
+  private String type = DEFAULT_TYPE;
  private int flags;
-  
-  /**
-   * @deprecated This will be made private. Instead, use:
-   * {@link #getPayload()}, or @{link setPayload(Payload)}.
-   */
-  Payload payload;
-  
-  /**
-   * @deprecated This will be made private. Instead, use:
-   * {@link #getPositionIncrement()}, or @{link setPositionIncrement(String)}.
-   */
-  int positionIncrement = 1;
+  private Payload payload;
+  private int positionIncrement = 1;

  /** Constructs a Token will null text. */
  public Token() {
@ -236,10 +176,9 @@ public class Token extends AttributeImpl
   *  @param text term text
   *  @param start start offset
   *  @param end end offset
-   *  @deprecated Use {@link #Token(char[], int, int, int, int)} instead.
   */
  public Token(String text, int start, int end) {
-    termText = text;
+    setTermBuffer(text);
    startOffset = start;
    endOffset = end;
  }
@ -252,10 +191,9 @@ public class Token extends AttributeImpl
   *  @param start start offset
   *  @param end end offset
   *  @param typ token type
-   *  @deprecated Use {@link #Token(char[], int, int, int, int)} and {@link #setType(String)} instead.
   */
  public Token(String text, int start, int end, String typ) {
-    termText = text;
+    setTermBuffer(text);
    startOffset = start;
    endOffset = end;
    type = typ;
@ -270,10 +208,9 @@ public class Token extends AttributeImpl
   * @param start
   * @param end
   * @param flags token type bits
-   * @deprecated Use {@link #Token(char[], int, int, int, int)} and {@link #setFlags(int)} instead.
   */
  public Token(String text, int start, int end, int flags) {
-    termText = text;
+    setTermBuffer(text);
    startOffset = start;
    endOffset = end;
    this.flags = flags;
@ -335,32 +272,6 @@ public class Token extends AttributeImpl
    return positionIncrement;
  }

-  /** Sets the Token's term text.  <b>NOTE:</b> for better
-   *  indexing speed you should instead use the char[]
-   *  termBuffer methods to set the term text.
-   *  @deprecated use {@link #setTermBuffer(char[], int, int)} or
-   *                  {@link #setTermBuffer(String)} or
-   *                  {@link #setTermBuffer(String, int, int)}.
-   */
-  public void setTermText(String text) {
-    termText = text;
-    termBuffer = null;
-  }
-
-  /** Returns the Token's term text.
-   * 
-   * @deprecated This method now has a performance penalty
-   * because the text is stored internally in a char[].  If
-   * possible, use {@link #termBuffer()} and {@link
-   * #termLength()} directly instead.  If you really need a
-   * String, use {@link #term()}</b>
-   */
-  public final String termText() {
-    if (termText == null && termBuffer != null)
-      termText = new String(termBuffer, 0, termLength);
-    return termText;
-  }
-
  /** Returns the Token's term text.
   * 
   * This method has a performance penalty
@ -371,8 +282,6 @@ public class Token extends AttributeImpl
   * a convenience call to <b>new String(token.termBuffer(), 0, token.termLength())</b>
   */
  public final String term() {
-    if (termText != null)
-      return termText;
    initTermBuffer();
    return new String(termBuffer, 0, termLength);
  }
@ -384,7 +293,6 @@ public class Token extends AttributeImpl
   *  @param length the number of characters to copy
   */
  public final void setTermBuffer(char[] buffer, int offset, int length) {
-    termText = null;
    growTermBuffer(length);
    System.arraycopy(buffer, offset, termBuffer, 0, length);
    termLength = length;
@ -394,7 +302,6 @@ public class Token extends AttributeImpl
   *  @param buffer the buffer to copy
   */
  public final void setTermBuffer(String buffer) {
-    termText = null;
    final int length = buffer.length();
    growTermBuffer(length);
    buffer.getChars(0, length, termBuffer, 0);
@ -410,7 +317,6 @@ public class Token extends AttributeImpl
  public final void setTermBuffer(String buffer, int offset, int length) {
    assert offset <= buffer.length();
    assert offset + length <= buffer.length();
-    termText = null;
    growTermBuffer(length);
    buffer.getChars(offset, offset + length, termBuffer, 0);
    termLength = length;
@ -441,17 +347,7 @@ public class Token extends AttributeImpl
  public char[] resizeTermBuffer(int newSize) {
    if (termBuffer == null) {
      // The buffer is always at least MIN_BUFFER_SIZE
-      newSize = newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize;
-      //Preserve termText 
-      if (termText != null) {
-        final int ttLen = termText.length();
-        newSize = newSize < ttLen ? ttLen : newSize;
-        termBuffer = new char[ArrayUtil.getNextSize(newSize)];
-        termText.getChars(0, termText.length(), termBuffer, 0);
-        termText = null;
-      } else { // no term Text, the first allocation
-        termBuffer = new char[ArrayUtil.getNextSize(newSize)];
-      }    
+      termBuffer = new char[ArrayUtil.getNextSize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize)]; 
    } else {
      if(termBuffer.length < newSize){
        // Not big enough; create a new array with slight
@ -481,25 +377,10 @@ public class Token extends AttributeImpl
    } 
  }
  
-
-  // TODO: once we remove the deprecated termText() method
-  // and switch entirely to char[] termBuffer we don't need
-  // to use this method anymore, only for late init of the buffer
  private void initTermBuffer() {
    if (termBuffer == null) {
-      if (termText == null) {
-        termBuffer = new char[ArrayUtil.getNextSize(MIN_BUFFER_SIZE)];
-        termLength = 0;
-      } else {
-        int length = termText.length();
-        if (length < MIN_BUFFER_SIZE) length = MIN_BUFFER_SIZE;
-        termBuffer = new char[ArrayUtil.getNextSize(length)];
-        termLength = termText.length();
-        termText.getChars(0, termText.length(), termBuffer, 0);
-        termText = null;
-      }
-    } else {
-      termText = null;
+      termBuffer = new char[ArrayUtil.getNextSize(MIN_BUFFER_SIZE)];
+      termLength = 0;
    }
  }

@ -528,7 +409,7 @@ public class Token extends AttributeImpl
    corresponding to this token in the source text.

    Note that the difference between endOffset() and startOffset() may not be
-    equal to termText.length(), as the term text may have been altered by a
+    equal to {@link #termLength}, as the term text may have been altered by a
    stemmer or some other filter. */
  public final int startOffset() {
    return startOffset;
@ -630,7 +511,6 @@ public class Token extends AttributeImpl
    payload = null;
    // Leave termBuffer to allow re-use
    termLength = 0;
-    termText = null;
    positionIncrement = 1;
    flags = 0;
    startOffset = endOffset = 0;
--- a/src/test/org/apache/lucene/analysis/TestToken.java
+++ b/src/test/org/apache/lucene/analysis/TestToken.java
@ -150,7 +150,7 @@ public class TestToken extends LuceneTestCase {
    t.setTermBuffer(b, 0, 5);
    assertEquals("(aloha,0,5)", t.toString());

-    t.setTermText("hi there");
+    t.setTermBuffer("hi there");
    assertEquals("(hi there,0,5)", t.toString());
  }

@ -171,20 +171,17 @@ public class TestToken extends LuceneTestCase {
  
  public void testMixedStringArray() throws Exception {
    Token t = new Token("hello", 0, 5);
-    assertEquals(t.termText(), "hello");
    assertEquals(t.termLength(), 5);
    assertEquals(t.term(), "hello");
-    t.setTermText("hello2");
+    t.setTermBuffer("hello2");
    assertEquals(t.termLength(), 6);
    assertEquals(t.term(), "hello2");
    t.setTermBuffer("hello3".toCharArray(), 0, 6);
-    assertEquals(t.termText(), "hello3");
+    assertEquals(t.term(), "hello3");

-    // Make sure if we get the buffer and change a character
-    // that termText() reflects the change
    char[] buffer = t.termBuffer();
    buffer[1] = 'o';
-    assertEquals(t.termText(), "hollo3");
+    assertEquals(t.term(), "hollo3");
  }
  
  public void testClone() throws Exception {