LUCENE-1987: Remove lots of deprecations from analysis package. Undeprecate some Token ctors.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@826404 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2009-10-18 10:37:56 +00:00
parent 173d95a166
commit 376873eba5
7 changed files with 34 additions and 199 deletions

View File

@ -75,6 +75,12 @@ API Changes
* LUCENE-1979: Remove remaining deprecations from indexer package.
(Uwe Schindler, Michael Busch)
* LUCENE-1989: Generify CharArraySet. (Uwe Schindler)
* LUCENE-1987: Rremove deprecations from analysis package and Token.
Un-deprecate some ctors of Token, as they are still useful.
(Uwe Schindler)
Bug fixes
* LUCENE-1951: When the text provided to WildcardQuery has no wildcard

View File

@ -42,7 +42,7 @@
<property name="Name" value="Lucene"/>
<property name="dev.version" value="3.0-dev"/>
<property name="version" value="${dev.version}"/>
<property name="compatibility.tag" value="lucene_2_9_back_compat_tests_20091014"/>
<property name="compatibility.tag" value="lucene_2_9_back_compat_tests_20091018"/>
<property name="spec.version" value="${version}"/>
<property name="year" value="2000-${current.year}"/>
<property name="final.name" value="lucene-${name}-${version}"/>

View File

@ -1,47 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis;
/**
* Replacement for Java 1.5 Character.valueOf()
* @deprecated Move to Character.valueOf() in 3.0
*/
class CharacterCache {
private static final Character cache[] = new Character[128];
static {
for (int i = 0; i < cache.length; i++) {
cache[i] = new Character((char) i);
}
}
/**
* Returns a Character instance representing the given char value
*
* @param c
* a char value
* @return a Character representation of the given char value.
*/
public static Character valueOf(char c) {
if (c < cache.length) {
return cache[(int) c];
}
return new Character(c);
}
}

View File

@ -30,8 +30,7 @@ import java.util.LinkedList;
public class MappingCharFilter extends BaseCharFilter {
private final NormalizeCharMap normMap;
//private LinkedList<Character> buffer;
private LinkedList buffer;
private LinkedList<Character> buffer;
private String replacement;
private int charPointer;
private int nextCharCounter;
@ -57,7 +56,7 @@ public class MappingCharFilter extends BaseCharFilter {
int firstChar = nextChar();
if (firstChar == -1) return -1;
NormalizeCharMap nm = normMap.submap != null ?
(NormalizeCharMap)normMap.submap.get(CharacterCache.valueOf((char) firstChar)) : null;
normMap.submap.get(Character.valueOf((char) firstChar)) : null;
if (nm == null) return firstChar;
NormalizeCharMap result = match(nm);
if (result == null) return firstChar;
@ -78,7 +77,7 @@ public class MappingCharFilter extends BaseCharFilter {
private int nextChar() throws IOException {
nextCharCounter++;
if (buffer != null && !buffer.isEmpty()) {
return ((Character)buffer.removeFirst()).charValue();
return buffer.removeFirst().charValue();
}
return input.read();
}
@ -86,15 +85,15 @@ public class MappingCharFilter extends BaseCharFilter {
private void pushChar(int c) {
nextCharCounter--;
if(buffer == null)
buffer = new LinkedList();
buffer.addFirst(new Character((char) c));
buffer = new LinkedList<Character>();
buffer.addFirst(Character.valueOf((char) c));
}
private void pushLastChar(int c) {
if (buffer == null) {
buffer = new LinkedList();
buffer = new LinkedList<Character>();
}
buffer.addLast(new Character((char) c));
buffer.addLast(Character.valueOf((char) c));
}
private NormalizeCharMap match(NormalizeCharMap map) throws IOException {
@ -102,7 +101,7 @@ public class MappingCharFilter extends BaseCharFilter {
if (map.submap != null) {
int chr = nextChar();
if (chr != -1) {
NormalizeCharMap subMap = (NormalizeCharMap) map.submap.get(CharacterCache.valueOf((char) chr));
NormalizeCharMap subMap = map.submap.get(Character.valueOf((char) chr));
if (subMap != null) {
result = match(subMap);
}

View File

@ -45,10 +45,10 @@ public class NormalizeCharMap {
if (currMap.submap == null) {
currMap.submap = new HashMap<Character, NormalizeCharMap>(1);
}
NormalizeCharMap map = currMap.submap.get(CharacterCache.valueOf(c));
NormalizeCharMap map = currMap.submap.get(Character.valueOf(c));
if (map == null) {
map = new NormalizeCharMap();
currMap.submap.put(new Character(c), map);
currMap.submap.put(Character.valueOf(c), map);
}
currMap = map;
}

View File

@ -58,18 +58,6 @@ import org.apache.lucene.util.AttributeImpl;
to easily switch from the old to the new TokenStream API.
<br><br>
<p><b>NOTE:</b> As of 2.3, Token stores the term text
internally as a malleable char[] termBuffer instead of
String termText. The indexing code and core tokenizers
have been changed to re-use a single Token instance, changing
its buffer and other fields in-place as the Token is
processed. This provides substantially better indexing
performance as it saves the GC cost of new'ing a Token and
String for every term. The APIs that accept String
termText are still available but a warning about the
associated performance cost has been added (below). The
{@link #termText()} method has been deprecated.</p>
<p>Tokenizers and filters should try to re-use a Token
instance when possible for best performance, by
@ -135,61 +123,13 @@ public class Token extends AttributeImpl
private static int MIN_BUFFER_SIZE = 10;
/** @deprecated We will remove this when we remove the
* deprecated APIs */
private String termText;
/**
* Characters for the term text.
* @deprecated This will be made private. Instead, use:
* {@link #termBuffer()},
* {@link #setTermBuffer(char[], int, int)},
* {@link #setTermBuffer(String)}, or
* {@link #setTermBuffer(String, int, int)}
*/
char[] termBuffer;
/**
* Length of term text in the buffer.
* @deprecated This will be made private. Instead, use:
* {@link #termLength()}, or @{link setTermLength(int)}.
*/
int termLength;
/**
* Start in source text.
* @deprecated This will be made private. Instead, use:
* {@link #startOffset()}, or @{link setStartOffset(int)}.
*/
int startOffset;
/**
* End in source text.
* @deprecated This will be made private. Instead, use:
* {@link #endOffset()}, or @{link setEndOffset(int)}.
*/
int endOffset;
/**
* The lexical type of the token.
* @deprecated This will be made private. Instead, use:
* {@link #type()}, or @{link setType(String)}.
*/
String type = DEFAULT_TYPE;
private char[] termBuffer;
private int termLength;
private int startOffset,endOffset;
private String type = DEFAULT_TYPE;
private int flags;
/**
* @deprecated This will be made private. Instead, use:
* {@link #getPayload()}, or @{link setPayload(Payload)}.
*/
Payload payload;
/**
* @deprecated This will be made private. Instead, use:
* {@link #getPositionIncrement()}, or @{link setPositionIncrement(String)}.
*/
int positionIncrement = 1;
private Payload payload;
private int positionIncrement = 1;
/** Constructs a Token will null text. */
public Token() {
@ -236,10 +176,9 @@ public class Token extends AttributeImpl
* @param text term text
* @param start start offset
* @param end end offset
* @deprecated Use {@link #Token(char[], int, int, int, int)} instead.
*/
public Token(String text, int start, int end) {
termText = text;
setTermBuffer(text);
startOffset = start;
endOffset = end;
}
@ -252,10 +191,9 @@ public class Token extends AttributeImpl
* @param start start offset
* @param end end offset
* @param typ token type
* @deprecated Use {@link #Token(char[], int, int, int, int)} and {@link #setType(String)} instead.
*/
public Token(String text, int start, int end, String typ) {
termText = text;
setTermBuffer(text);
startOffset = start;
endOffset = end;
type = typ;
@ -270,10 +208,9 @@ public class Token extends AttributeImpl
* @param start
* @param end
* @param flags token type bits
* @deprecated Use {@link #Token(char[], int, int, int, int)} and {@link #setFlags(int)} instead.
*/
public Token(String text, int start, int end, int flags) {
termText = text;
setTermBuffer(text);
startOffset = start;
endOffset = end;
this.flags = flags;
@ -335,32 +272,6 @@ public class Token extends AttributeImpl
return positionIncrement;
}
/** Sets the Token's term text. <b>NOTE:</b> for better
* indexing speed you should instead use the char[]
* termBuffer methods to set the term text.
* @deprecated use {@link #setTermBuffer(char[], int, int)} or
* {@link #setTermBuffer(String)} or
* {@link #setTermBuffer(String, int, int)}.
*/
public void setTermText(String text) {
termText = text;
termBuffer = null;
}
/** Returns the Token's term text.
*
* @deprecated This method now has a performance penalty
* because the text is stored internally in a char[]. If
* possible, use {@link #termBuffer()} and {@link
* #termLength()} directly instead. If you really need a
* String, use {@link #term()}</b>
*/
public final String termText() {
if (termText == null && termBuffer != null)
termText = new String(termBuffer, 0, termLength);
return termText;
}
/** Returns the Token's term text.
*
* This method has a performance penalty
@ -371,8 +282,6 @@ public class Token extends AttributeImpl
* a convenience call to <b>new String(token.termBuffer(), 0, token.termLength())</b>
*/
public final String term() {
if (termText != null)
return termText;
initTermBuffer();
return new String(termBuffer, 0, termLength);
}
@ -384,7 +293,6 @@ public class Token extends AttributeImpl
* @param length the number of characters to copy
*/
public final void setTermBuffer(char[] buffer, int offset, int length) {
termText = null;
growTermBuffer(length);
System.arraycopy(buffer, offset, termBuffer, 0, length);
termLength = length;
@ -394,7 +302,6 @@ public class Token extends AttributeImpl
* @param buffer the buffer to copy
*/
public final void setTermBuffer(String buffer) {
termText = null;
final int length = buffer.length();
growTermBuffer(length);
buffer.getChars(0, length, termBuffer, 0);
@ -410,7 +317,6 @@ public class Token extends AttributeImpl
public final void setTermBuffer(String buffer, int offset, int length) {
assert offset <= buffer.length();
assert offset + length <= buffer.length();
termText = null;
growTermBuffer(length);
buffer.getChars(offset, offset + length, termBuffer, 0);
termLength = length;
@ -441,17 +347,7 @@ public class Token extends AttributeImpl
public char[] resizeTermBuffer(int newSize) {
if (termBuffer == null) {
// The buffer is always at least MIN_BUFFER_SIZE
newSize = newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize;
//Preserve termText
if (termText != null) {
final int ttLen = termText.length();
newSize = newSize < ttLen ? ttLen : newSize;
termBuffer = new char[ArrayUtil.getNextSize(newSize)];
termText.getChars(0, termText.length(), termBuffer, 0);
termText = null;
} else { // no term Text, the first allocation
termBuffer = new char[ArrayUtil.getNextSize(newSize)];
}
termBuffer = new char[ArrayUtil.getNextSize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize)];
} else {
if(termBuffer.length < newSize){
// Not big enough; create a new array with slight
@ -481,25 +377,10 @@ public class Token extends AttributeImpl
}
}
// TODO: once we remove the deprecated termText() method
// and switch entirely to char[] termBuffer we don't need
// to use this method anymore, only for late init of the buffer
private void initTermBuffer() {
if (termBuffer == null) {
if (termText == null) {
termBuffer = new char[ArrayUtil.getNextSize(MIN_BUFFER_SIZE)];
termLength = 0;
} else {
int length = termText.length();
if (length < MIN_BUFFER_SIZE) length = MIN_BUFFER_SIZE;
termBuffer = new char[ArrayUtil.getNextSize(length)];
termLength = termText.length();
termText.getChars(0, termText.length(), termBuffer, 0);
termText = null;
}
} else {
termText = null;
termBuffer = new char[ArrayUtil.getNextSize(MIN_BUFFER_SIZE)];
termLength = 0;
}
}
@ -528,7 +409,7 @@ public class Token extends AttributeImpl
corresponding to this token in the source text.
Note that the difference between endOffset() and startOffset() may not be
equal to termText.length(), as the term text may have been altered by a
equal to {@link #termLength}, as the term text may have been altered by a
stemmer or some other filter. */
public final int startOffset() {
return startOffset;
@ -630,7 +511,6 @@ public class Token extends AttributeImpl
payload = null;
// Leave termBuffer to allow re-use
termLength = 0;
termText = null;
positionIncrement = 1;
flags = 0;
startOffset = endOffset = 0;

View File

@ -150,7 +150,7 @@ public class TestToken extends LuceneTestCase {
t.setTermBuffer(b, 0, 5);
assertEquals("(aloha,0,5)", t.toString());
t.setTermText("hi there");
t.setTermBuffer("hi there");
assertEquals("(hi there,0,5)", t.toString());
}
@ -171,20 +171,17 @@ public class TestToken extends LuceneTestCase {
public void testMixedStringArray() throws Exception {
Token t = new Token("hello", 0, 5);
assertEquals(t.termText(), "hello");
assertEquals(t.termLength(), 5);
assertEquals(t.term(), "hello");
t.setTermText("hello2");
t.setTermBuffer("hello2");
assertEquals(t.termLength(), 6);
assertEquals(t.term(), "hello2");
t.setTermBuffer("hello3".toCharArray(), 0, 6);
assertEquals(t.termText(), "hello3");
assertEquals(t.term(), "hello3");
// Make sure if we get the buffer and change a character
// that termText() reflects the change
char[] buffer = t.termBuffer();
buffer[1] = 'o';
assertEquals(t.termText(), "hollo3");
assertEquals(t.term(), "hollo3");
}
public void testClone() throws Exception {