LUCENE-2372: Convert core analyzers to CharTermAttribute. Also made rest of core analyzers final.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@932749 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2010-04-10 15:41:27 +00:00
parent 06283e9f63
commit d02cbfe3c1
45 changed files with 258 additions and 385 deletions

View File

@ -97,6 +97,10 @@ Changes in backwards compatibility policy
TODO: Point to new attribute inspection API coming with LUCENE-2374.
(Uwe Schindler, Robert Muir)
* LUCENE-2372: StandardAnalyzer, KeywordAnalyzer, PerFieldAnalyzerWrapper
are now final. Also removed the now obsolete and deprecated
Analyzer.setOverridesTokenStreamMethod(). (Uwe Schindler)
Changes in runtime behavior
* LUCENE-1923: Made IndexReader.toString() produce something

View File

@ -120,6 +120,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
String[] y = StandardTokenizer.TOKEN_TYPES;
}
/* StandardAnalyzer was made final in 3.1:
private static class MyStandardAnalyzer extends StandardAnalyzer {
public MyStandardAnalyzer() {
super(org.apache.lucene.util.Version.LUCENE_CURRENT);
@ -139,6 +140,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
assertTrue(ts.incrementToken());
assertFalse(ts.incrementToken());
}
*/
}
class PayloadSetter extends TokenFilter {

View File

@ -19,7 +19,7 @@ package org.apache.lucene.analysis;
import java.io.IOException;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
@ -61,18 +61,17 @@ public final class ASCIIFoldingFilter extends TokenFilter {
public ASCIIFoldingFilter(TokenStream input)
{
super(input);
termAtt = addAttribute(TermAttribute.class);
}
private char[] output = new char[512];
private int outputPos;
private TermAttribute termAtt;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
final char[] buffer = termAtt.termBuffer();
final int length = termAtt.termLength();
final char[] buffer = termAtt.buffer();
final int length = termAtt.length();
// If no characters actually require rewriting then we
// just return token as-is:
@ -81,7 +80,7 @@ public final class ASCIIFoldingFilter extends TokenFilter {
if (c >= '\u0080')
{
foldToASCII(buffer, length);
termAtt.setTermBuffer(output, 0, outputPos);
termAtt.copyBuffer(output, 0, outputPos);
break;
}
}

View File

@ -84,28 +84,6 @@ public abstract class Analyzer implements Closeable {
}
}
private static final VirtualMethod<Analyzer> tokenStreamMethod =
new VirtualMethod<Analyzer>(Analyzer.class, "tokenStream", String.class, Reader.class);
private static final VirtualMethod<Analyzer> reusableTokenStreamMethod =
new VirtualMethod<Analyzer>(Analyzer.class, "reusableTokenStream", String.class, Reader.class);
/** This field contains if the {@link #tokenStream} method was overridden in a
* more far away subclass of {@code Analyzer} on the current instance's inheritance path.
* If this field is {@code true}, {@link #reusableTokenStream} should delegate to {@link #tokenStream}
* instead of using the own implementation.
* @deprecated Please declare all implementations of {@link #reusableTokenStream} and {@link #tokenStream}
* as {@code final}.
*/
@Deprecated
protected final boolean overridesTokenStreamMethod =
VirtualMethod.compareImplementationDistance(this.getClass(), tokenStreamMethod, reusableTokenStreamMethod) > 0;
/** @deprecated This is a no-op since Lucene 3.1. */
@Deprecated
protected void setOverridesTokenStreamMethod(Class<? extends Analyzer> baseClass) {
}
/**
* Invoked before indexing a Fieldable instance if
* terms have already been added to that field. This allows custom

View File

@ -21,7 +21,7 @@ import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.CharacterUtils;
import org.apache.lucene.util.Version;
@ -78,10 +78,7 @@ public abstract class CharTokenizer extends Tokenizer {
public CharTokenizer(Version matchVersion, Reader input) {
super(input);
charUtils = CharacterUtils.getInstance(matchVersion);
offsetAtt = addAttribute(OffsetAttribute.class);
termAtt = addAttribute(TermAttribute.class);
useOldAPI = useOldAPI(matchVersion);
ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
}
@ -99,10 +96,7 @@ public abstract class CharTokenizer extends Tokenizer {
Reader input) {
super(source, input);
charUtils = CharacterUtils.getInstance(matchVersion);
offsetAtt = addAttribute(OffsetAttribute.class);
termAtt = addAttribute(TermAttribute.class);
useOldAPI = useOldAPI(matchVersion);
ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
}
/**
@ -119,10 +113,7 @@ public abstract class CharTokenizer extends Tokenizer {
Reader input) {
super(factory, input);
charUtils = CharacterUtils.getInstance(matchVersion);
offsetAtt = addAttribute(OffsetAttribute.class);
termAtt = addAttribute(TermAttribute.class);
useOldAPI = useOldAPI(matchVersion);
ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
}
/**
@ -164,11 +155,11 @@ public abstract class CharTokenizer extends Tokenizer {
private static final int MAX_WORD_LEN = 255;
private static final int IO_BUFFER_SIZE = 4096;
private final TermAttribute termAtt;
private final OffsetAttribute offsetAtt;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);;
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final CharacterUtils charUtils;
private final CharacterBuffer ioBuffer;
private final CharacterBuffer ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
/**
* @deprecated this will be removed in lucene 4.0
@ -275,7 +266,7 @@ public abstract class CharTokenizer extends Tokenizer {
return incrementTokenOld();
int length = 0;
int start = bufferIndex;
char[] buffer = termAtt.termBuffer();
char[] buffer = termAtt.buffer();
while (true) {
if (bufferIndex >= dataLen) {
offset += dataLen;
@ -297,7 +288,7 @@ public abstract class CharTokenizer extends Tokenizer {
if (length == 0) // start of token
start = offset + bufferIndex - 1;
else if (length >= buffer.length-1) // check if a supplementary could run out of bounds
buffer = termAtt.resizeTermBuffer(2+length); // make sure a supplementary fits in the buffer
buffer = termAtt.resizeBuffer(2+length); // make sure a supplementary fits in the buffer
length += Character.toChars(normalize(c), buffer, length); // buffer it, normalized
if (length >= MAX_WORD_LEN) // buffer overflow! make sure to check for >= surrogate pair could break == test
break;
@ -305,7 +296,7 @@ public abstract class CharTokenizer extends Tokenizer {
break; // return 'em
}
termAtt.setTermLength(length);
termAtt.setLength(length);
offsetAtt.setOffset(correctOffset(start), correctOffset(start+length));
return true;
@ -320,7 +311,7 @@ public abstract class CharTokenizer extends Tokenizer {
private boolean incrementTokenOld() throws IOException {
int length = 0;
int start = bufferIndex;
char[] buffer = termAtt.termBuffer();
char[] buffer = termAtt.buffer();
final char[] oldIoBuffer = ioBuffer.getBuffer();
while (true) {
@ -344,7 +335,7 @@ public abstract class CharTokenizer extends Tokenizer {
if (length == 0) // start of token
start = offset + bufferIndex - 1;
else if (length == buffer.length)
buffer = termAtt.resizeTermBuffer(1+length);
buffer = termAtt.resizeBuffer(1+length);
buffer[length++] = normalize(c); // buffer it, normalized
@ -355,7 +346,7 @@ public abstract class CharTokenizer extends Tokenizer {
break; // return 'em
}
termAtt.setTermLength(length);
termAtt.setLength(length);
offsetAtt.setOffset(correctOffset(start), correctOffset(start+length));
return true;
}

View File

@ -1,7 +1,5 @@
package org.apache.lucene.analysis;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -19,6 +17,8 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
* limitations under the License.
*/
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* A filter that replaces accented characters in the ISO Latin 1 character set
* (ISO-8859-1) by their unaccented equivalent. The case will not be altered.
@ -35,25 +35,24 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
public final class ISOLatin1AccentFilter extends TokenFilter {
public ISOLatin1AccentFilter(TokenStream input) {
super(input);
termAtt = addAttribute(TermAttribute.class);
}
private char[] output = new char[256];
private int outputPos;
private TermAttribute termAtt;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
@Override
public final boolean incrementToken() throws java.io.IOException {
if (input.incrementToken()) {
final char[] buffer = termAtt.termBuffer();
final int length = termAtt.termLength();
final char[] buffer = termAtt.buffer();
final int length = termAtt.length();
// If no characters actually require rewriting then we
// just return token as-is:
for(int i=0;i<length;i++) {
final char c = buffer[i];
if (c >= '\u00c0' && c <= '\uFB06') {
removeAccents(buffer, length);
termAtt.setTermBuffer(output, 0, outputPos);
termAtt.copyBuffer(output, 0, outputPos);
break;
}
}

View File

@ -17,36 +17,18 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
/**
* "Tokenizes" the entire stream as a single token. This is useful
* for data like zip codes, ids, and some product names.
*/
public class KeywordAnalyzer extends Analyzer {
public final class KeywordAnalyzer extends ReusableAnalyzerBase {
public KeywordAnalyzer() {
}
@Override
public TokenStream tokenStream(String fieldName,
final Reader reader) {
return new KeywordTokenizer(reader);
}
@Override
public TokenStream reusableTokenStream(String fieldName,
final Reader reader) throws IOException {
if (overridesTokenStreamMethod) {
// LUCENE-1678: force fallback to tokenStream() if we
// have been subclassed and that subclass overrides
// tokenStream but not reusableTokenStream
return tokenStream(fieldName, reader);
}
Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
if (tokenizer == null) {
tokenizer = new KeywordTokenizer(reader);
setPreviousTokenStream(tokenizer);
} else
tokenizer.reset(reader);
return tokenizer;
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
return new TokenStreamComponents(new KeywordTokenizer(reader));
}
}

View File

@ -21,7 +21,7 @@ import java.io.IOException;
import java.util.Set;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;
/**
@ -33,8 +33,8 @@ import org.apache.lucene.util.Version;
*/
public final class KeywordMarkerTokenFilter extends TokenFilter {
private final KeywordAttribute keywordAttr;
private final TermAttribute termAtt;
private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final CharArraySet keywordSet;
/**
@ -50,8 +50,6 @@ public final class KeywordMarkerTokenFilter extends TokenFilter {
public KeywordMarkerTokenFilter(final TokenStream in,
final CharArraySet keywordSet) {
super(in);
termAtt = addAttribute(TermAttribute.class);
keywordAttr = addAttribute(KeywordAttribute.class);
this.keywordSet = keywordSet;
}
@ -73,8 +71,8 @@ public final class KeywordMarkerTokenFilter extends TokenFilter {
@Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
keywordAttr.setKeyword(keywordSet.contains(termAtt.termBuffer(), 0,
termAtt.termLength()));
keywordAttr.setKeyword(keywordSet.contains(termAtt.buffer(), 0,
termAtt.length()));
return true;
} else
return false;

View File

@ -21,7 +21,7 @@ import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeSource;
/**
@ -31,10 +31,10 @@ public final class KeywordTokenizer extends Tokenizer {
private static final int DEFAULT_BUFFER_SIZE = 256;
private boolean done;
private boolean done = false;
private int finalOffset;
private TermAttribute termAtt;
private OffsetAttribute offsetAtt;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
public KeywordTokenizer(Reader input) {
this(input, DEFAULT_BUFFER_SIZE);
@ -42,24 +42,17 @@ public final class KeywordTokenizer extends Tokenizer {
public KeywordTokenizer(Reader input, int bufferSize) {
super(input);
init(bufferSize);
termAtt.resizeBuffer(bufferSize);
}
public KeywordTokenizer(AttributeSource source, Reader input, int bufferSize) {
super(source, input);
init(bufferSize);
termAtt.resizeBuffer(bufferSize);
}
public KeywordTokenizer(AttributeFactory factory, Reader input, int bufferSize) {
super(factory, input);
init(bufferSize);
}
private void init(int bufferSize) {
this.done = false;
termAtt = addAttribute(TermAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
termAtt.resizeTermBuffer(bufferSize);
termAtt.resizeBuffer(bufferSize);
}
@Override
@ -68,15 +61,15 @@ public final class KeywordTokenizer extends Tokenizer {
clearAttributes();
done = true;
int upto = 0;
char[] buffer = termAtt.termBuffer();
char[] buffer = termAtt.buffer();
while (true) {
final int length = input.read(buffer, upto, buffer.length-upto);
if (length == -1) break;
upto += length;
if (upto == buffer.length)
buffer = termAtt.resizeTermBuffer(1+buffer.length);
buffer = termAtt.resizeBuffer(1+buffer.length);
}
termAtt.setTermLength(upto);
termAtt.setLength(upto);
finalOffset = correctOffset(upto);
offsetAtt.setOffset(correctOffset(0), finalOffset);
return true;

View File

@ -19,17 +19,17 @@ package org.apache.lucene.analysis;
import java.io.IOException;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* Removes words that are too long or too short from the stream.
*/
public final class LengthFilter extends TokenFilter {
final int min;
final int max;
private final int min;
private final int max;
private TermAttribute termAtt;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/**
* Build a filter that removes words that are too long or too
@ -40,7 +40,6 @@ public final class LengthFilter extends TokenFilter {
super(in);
this.min = min;
this.max = max;
termAtt = addAttribute(TermAttribute.class);
}
/**
@ -50,7 +49,7 @@ public final class LengthFilter extends TokenFilter {
public final boolean incrementToken() throws IOException {
// return the first non-stop word found
while (input.incrementToken()) {
int len = termAtt.termLength();
int len = termAtt.length();
if (len >= min && len <= max) {
return true;
}

View File

@ -19,7 +19,7 @@ package org.apache.lucene.analysis;
import java.io.IOException;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.CharacterUtils;
import org.apache.lucene.util.Version;
@ -34,7 +34,8 @@ import org.apache.lucene.util.Version;
*/
public final class LowerCaseFilter extends TokenFilter {
private final CharacterUtils charUtils;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/**
* Create a new LowerCaseFilter, that normalizes token text to lower case.
*
@ -43,7 +44,6 @@ public final class LowerCaseFilter extends TokenFilter {
*/
public LowerCaseFilter(Version matchVersion, TokenStream in) {
super(in);
termAtt = addAttribute(TermAttribute.class);
charUtils = CharacterUtils.getInstance(matchVersion);
}
@ -55,13 +55,11 @@ public final class LowerCaseFilter extends TokenFilter {
this(Version.LUCENE_30, in);
}
private TermAttribute termAtt;
@Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
final char[] buffer = termAtt.termBuffer();
final int length = termAtt.termLength();
final char[] buffer = termAtt.buffer();
final int length = termAtt.length();
for (int i = 0; i < length;) {
i += Character.toChars(
Character.toLowerCase(

View File

@ -26,7 +26,6 @@ import org.apache.lucene.document.NumericField; // for javadocs
import org.apache.lucene.search.NumericRangeQuery; // for javadocs
import org.apache.lucene.search.NumericRangeFilter; // for javadocs
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@ -118,11 +117,14 @@ public final class NumericTokenStream extends TokenStream {
this.delegate = delegate;
}
@Override
@Override @SuppressWarnings("deprecation")
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
if (attClass == NumericTermAttribute.class)
return new NumericTermAttributeImpl(ts);
if (attClass.isAssignableFrom(CharTermAttribute.class) || attClass.isAssignableFrom(TermAttribute.class))
if (attClass.isAssignableFrom(CharTermAttribute.class) ||
// TODO: remove in 4.0 (deprecated class, also remove the suppress above):
attClass.isAssignableFrom(org.apache.lucene.analysis.tokenattributes.TermAttribute.class)
)
throw new IllegalArgumentException("NumericTokenStream does not support CharTermAttribute/TermAttribute.");
return delegate.createAttributeInstance(attClass);
}

View File

@ -44,7 +44,7 @@ import java.util.HashMap;
* <p>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
* and query parsing.
*/
public class PerFieldAnalyzerWrapper extends Analyzer {
public final class PerFieldAnalyzerWrapper extends Analyzer {
private Analyzer defaultAnalyzer;
private Map<String,Analyzer> analyzerMap = new HashMap<String,Analyzer>();
@ -99,12 +99,6 @@ public class PerFieldAnalyzerWrapper extends Analyzer {
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
if (overridesTokenStreamMethod) {
// LUCENE-1678: force fallback to tokenStream() if we
// have been subclassed and that subclass overrides
// tokenStream but not reusableTokenStream
return tokenStream(fieldName, reader);
}
Analyzer analyzer = analyzerMap.get(fieldName);
if (analyzer == null)
analyzer = defaultAnalyzer;

View File

@ -20,7 +20,7 @@ package org.apache.lucene.analysis;
import java.io.IOException;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/** Transforms the token stream as per the Porter stemming algorithm.
Note: the input to the stemming filter must already be in lower case,
@ -47,15 +47,12 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
</p>
*/
public final class PorterStemFilter extends TokenFilter {
private final PorterStemmer stemmer;
private final TermAttribute termAtt;
private final KeywordAttribute keywordAttr;
private final PorterStemmer stemmer = new PorterStemmer();
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
public PorterStemFilter(TokenStream in) {
super(in);
stemmer = new PorterStemmer();
termAtt = addAttribute(TermAttribute.class);
keywordAttr = addAttribute(KeywordAttribute.class);
}
@Override
@ -63,8 +60,8 @@ public final class PorterStemFilter extends TokenFilter {
if (!input.incrementToken())
return false;
if ((!keywordAttr.isKeyword()) && stemmer.stem(termAtt.termBuffer(), 0, termAtt.termLength()))
termAtt.setTermBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength());
if ((!keywordAttr.isKeyword()) && stemmer.stem(termAtt.buffer(), 0, termAtt.length()))
termAtt.copyBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength());
return true;
}
}

View File

@ -100,8 +100,8 @@ public abstract class ReusableAnalyzerBase extends Analyzer {
* {@link Analyzer#reusableTokenStream(String, Reader)}.
*/
public static class TokenStreamComponents {
final Tokenizer source;
final TokenStream sink;
protected final Tokenizer source;
protected final TokenStream sink;
/**
* Creates a new {@link TokenStreamComponents} instance.

View File

@ -23,7 +23,7 @@ import java.util.Set;
import java.util.List;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.queryParser.QueryParser; // for javadoc
import org.apache.lucene.util.Version;
@ -44,8 +44,8 @@ public final class StopFilter extends TokenFilter {
private final CharArraySet stopWords;
private boolean enablePositionIncrements = false;
private TermAttribute termAtt;
private PositionIncrementAttribute posIncrAtt;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
/**
* Construct a token stream filtering the given input.
@ -104,8 +104,6 @@ public final class StopFilter extends TokenFilter {
super(input);
this.stopWords = stopWords instanceof CharArraySet ? (CharArraySet)stopWords : new CharArraySet(matchVersion, stopWords, ignoreCase);
this.enablePositionIncrements = enablePositionIncrements;
termAtt = addAttribute(TermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
}
/**
@ -257,7 +255,7 @@ public final class StopFilter extends TokenFilter {
// return the first non-stop word found
int skippedPositions = 0;
while (input.incrementToken()) {
if (!stopWords.contains(termAtt.termBuffer(), 0, termAtt.termLength())) {
if (!stopWords.contains(termAtt.buffer(), 0, termAtt.length())) {
if (enablePositionIncrements) {
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
}

View File

@ -42,8 +42,12 @@ import java.util.Set;
* are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
* </ul>
*/
public class StandardAnalyzer extends Analyzer {
private Set<?> stopSet;
public final class StandardAnalyzer extends StopwordAnalyzerBase {
/** Default maximum allowed token length */
public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
/**
* Specifies whether deprecated acronyms should be replaced with HOST type.
@ -54,7 +58,15 @@ public class StandardAnalyzer extends Analyzer {
/** An unmodifiable set containing some common English words that are usually not
useful for searching. */
public static final Set<?> STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
private final Version matchVersion;
/** Builds an analyzer with the given stop words.
* @param matchVersion Lucene version to match See {@link
* <a href="#version">above</a>}
* @param stopWords stop words */
public StandardAnalyzer(Version matchVersion, Set<?> stopWords) {
super(matchVersion, stopWords);
replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
}
/** Builds an analyzer with the default stop words ({@link
* #STOP_WORDS_SET}).
@ -65,16 +77,6 @@ public class StandardAnalyzer extends Analyzer {
this(matchVersion, STOP_WORDS_SET);
}
/** Builds an analyzer with the given stop words.
* @param matchVersion Lucene version to match See {@link
* <a href="#version">above</a>}
* @param stopWords stop words */
public StandardAnalyzer(Version matchVersion, Set<?> stopWords) {
stopSet = stopWords;
replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
this.matchVersion = matchVersion;
}
/** Builds an analyzer with the stop words from the given file.
* @see WordlistLoader#getWordSet(File)
* @param matchVersion Lucene version to match See {@link
@ -93,28 +95,6 @@ public class StandardAnalyzer extends Analyzer {
this(matchVersion, WordlistLoader.getWordSet(stopwords));
}
/** Constructs a {@link StandardTokenizer} filtered by a {@link
StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
StandardTokenizer tokenStream = new StandardTokenizer(matchVersion, reader);
tokenStream.setMaxTokenLength(maxTokenLength);
TokenStream result = new StandardFilter(tokenStream);
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopSet);
return result;
}
private static final class SavedStreams {
StandardTokenizer tokenStream;
TokenStream filteredTokenStream;
}
/** Default maximum allowed token length */
public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
/**
* Set maximum allowed token length. If a token is seen
* that exceeds this length then it is discarded. This
@ -133,29 +113,19 @@ public class StandardAnalyzer extends Analyzer {
}
@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
if (overridesTokenStreamMethod) {
// LUCENE-1678: force fallback to tokenStream() if we
// have been subclassed and that subclass overrides
// tokenStream but not reusableTokenStream
return tokenStream(fieldName, reader);
}
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();
setPreviousTokenStream(streams);
streams.tokenStream = new StandardTokenizer(matchVersion, reader);
streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
streams.filteredTokenStream = new LowerCaseFilter(matchVersion,
streams.filteredTokenStream);
streams.filteredTokenStream = new StopFilter(matchVersion, streams.filteredTokenStream, stopSet);
} else {
streams.tokenStream.reset(reader);
}
streams.tokenStream.setMaxTokenLength(maxTokenLength);
streams.tokenStream.setReplaceInvalidAcronym(replaceInvalidAcronym);
return streams.filteredTokenStream;
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
src.setMaxTokenLength(maxTokenLength);
src.setReplaceInvalidAcronym(replaceInvalidAcronym);
TokenStream tok = new StandardFilter(src);
tok = new LowerCaseFilter(matchVersion, tok);
tok = new StopFilter(matchVersion, tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override
protected boolean reset(final Reader reader) throws IOException {
src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
return super.reset(reader);
}
};
}
}

View File

@ -19,27 +19,24 @@ package org.apache.lucene.analysis.standard;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
/** Normalizes tokens extracted with {@link StandardTokenizer}. */
public final class StandardFilter extends TokenFilter {
/** Construct filtering <i>in</i>. */
public StandardFilter(TokenStream in) {
super(in);
termAtt = addAttribute(TermAttribute.class);
typeAtt = addAttribute(TypeAttribute.class);
}
private static final String APOSTROPHE_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.APOSTROPHE];
private static final String ACRONYM_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ACRONYM];
// this filters uses attribute type
private final TypeAttribute typeAtt;
private final TermAttribute termAtt;
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/** Returns the next token in the stream, or null at EOS.
* <p>Removes <tt>'s</tt> from the end of words.
@ -51,16 +48,16 @@ public final class StandardFilter extends TokenFilter {
return false;
}
char[] buffer = termAtt.termBuffer();
final int bufferLength = termAtt.termLength();
final char[] buffer = termAtt.buffer();
final int bufferLength = termAtt.length();
final String type = typeAtt.type();
if (type == APOSTROPHE_TYPE && // remove 's
bufferLength >= 2 &&
bufferLength >= 2 &&
buffer[bufferLength-2] == '\'' &&
(buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S')) {
// Strip last 2 characters off
termAtt.setTermLength(bufferLength - 2);
termAtt.setLength(bufferLength - 2);
} else if (type == ACRONYM_TYPE) { // remove dots
int upto = 0;
for(int i=0;i<bufferLength;i++) {
@ -68,7 +65,7 @@ public final class StandardFilter extends TokenFilter {
if (c != '.')
buffer[upto++] = c;
}
termAtt.setTermLength(upto);
termAtt.setLength(upto);
}
return true;

View File

@ -20,7 +20,7 @@ package org.apache.lucene.collation;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.IndexableBinaryStringTools;
import java.io.IOException;
@ -73,8 +73,8 @@ import java.text.Collator;
* </p>
*/
public final class CollationKeyFilter extends TokenFilter {
private Collator collator = null;
private TermAttribute termAtt;
private final Collator collator;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/**
* @param input Source token stream
@ -83,23 +83,18 @@ public final class CollationKeyFilter extends TokenFilter {
public CollationKeyFilter(TokenStream input, Collator collator) {
super(input);
this.collator = collator;
termAtt = addAttribute(TermAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
char[] termBuffer = termAtt.termBuffer();
String termText = new String(termBuffer, 0, termAtt.termLength());
byte[] collationKey = collator.getCollationKey(termText).toByteArray();
byte[] collationKey = collator.getCollationKey(termAtt.toString()).toByteArray();
int encodedLength = IndexableBinaryStringTools.getEncodedLength(
collationKey, 0, collationKey.length);
if (encodedLength > termBuffer.length) {
termAtt.resizeTermBuffer(encodedLength);
}
termAtt.setTermLength(encodedLength);
termAtt.resizeBuffer(encodedLength);
termAtt.setLength(encodedLength);
IndexableBinaryStringTools.encode(collationKey, 0, collationKey.length,
termAtt.termBuffer(), 0, encodedLength);
termAtt.buffer(), 0, encodedLength);
return true;
} else {
return false;

View File

@ -17,7 +17,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
@ -522,7 +522,7 @@ public class QueryParser implements QueryParserConstants {
source = analyzer.tokenStream(field, new StringReader(queryText));
}
CachingTokenFilter buffer = new CachingTokenFilter(source);
TermAttribute termAtt = null;
CharTermAttribute termAtt = null;
PositionIncrementAttribute posIncrAtt = null;
int numTokens = 0;
@ -534,8 +534,8 @@ public class QueryParser implements QueryParserConstants {
// success==false if we hit an exception
}
if (success) {
if (buffer.hasAttribute(TermAttribute.class)) {
termAtt = buffer.getAttribute(TermAttribute.class);
if (buffer.hasAttribute(CharTermAttribute.class)) {
termAtt = buffer.getAttribute(CharTermAttribute.class);
}
if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
@ -581,7 +581,7 @@ public class QueryParser implements QueryParserConstants {
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.term();
term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
@ -596,7 +596,7 @@ public class QueryParser implements QueryParserConstants {
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.term();
term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
@ -619,7 +619,7 @@ public class QueryParser implements QueryParserConstants {
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.term();
term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
@ -659,7 +659,7 @@ public class QueryParser implements QueryParserConstants {
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.term();
term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}

View File

@ -41,7 +41,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
@ -546,7 +546,7 @@ public class QueryParser {
source = analyzer.tokenStream(field, new StringReader(queryText));
}
CachingTokenFilter buffer = new CachingTokenFilter(source);
TermAttribute termAtt = null;
CharTermAttribute termAtt = null;
PositionIncrementAttribute posIncrAtt = null;
int numTokens = 0;
@ -558,8 +558,8 @@ public class QueryParser {
// success==false if we hit an exception
}
if (success) {
if (buffer.hasAttribute(TermAttribute.class)) {
termAtt = buffer.getAttribute(TermAttribute.class);
if (buffer.hasAttribute(CharTermAttribute.class)) {
termAtt = buffer.getAttribute(CharTermAttribute.class);
}
if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
@ -605,7 +605,7 @@ public class QueryParser {
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.term();
term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
@ -620,7 +620,7 @@ public class QueryParser {
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.term();
term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
@ -643,7 +643,7 @@ public class QueryParser {
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.term();
term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
@ -683,7 +683,7 @@ public class QueryParser {
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.term();
term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}

View File

@ -15,7 +15,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;

View File

@ -28,7 +28,7 @@ import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.TermFreqVector;
/**
@ -61,11 +61,11 @@ public class QueryTermVector implements TermFreqVector {
boolean hasMoreTokens = false;
stream.reset();
TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
final CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
hasMoreTokens = stream.incrementToken();
while (hasMoreTokens) {
terms.add(termAtt.term());
terms.add(termAtt.toString());
hasMoreTokens = stream.incrementToken();
}
processTerms(terms.toArray(new String[terms.size()]));

View File

@ -83,8 +83,8 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
assertNotNull(output);
CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class);
assertTrue("has no TermAttribute", ts.hasAttribute(TermAttribute.class));
TermAttribute termAtt = ts.getAttribute(TermAttribute.class);
assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class));
CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = null;
if (startOffsets != null || endOffsets != null || finalOffset != null) {
@ -108,7 +108,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
for (int i = 0; i < output.length; i++) {
// extra safety to enforce, that the state is not preserved and also assign bogus values
ts.clearAttributes();
termAtt.setTermBuffer("bogusTerm");
termAtt.setEmpty().append("bogusTerm");
if (offsetAtt != null) offsetAtt.setOffset(14584724,24683243);
if (typeAtt != null) typeAtt.setType("bogusType");
if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
@ -117,7 +117,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
assertTrue("token "+i+" does not exist", ts.incrementToken());
assertTrue("clearAttributes() was not called correctly in TokenStream chain", checkClearAtt.getAndResetClearCalled());
assertEquals("term "+i, output[i], termAtt.term());
assertEquals("term "+i, output[i], termAtt.toString());
if (startOffsets != null)
assertEquals("startOffset "+i, startOffsets[i], offsetAtt.startOffset());
if (endOffsets != null)

View File

@ -17,7 +17,7 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.StringReader;
import java.util.List;
import java.util.ArrayList;
@ -33,7 +33,7 @@ public class TestASCIIFoldingFilter extends BaseTokenStreamTestCase {
+" ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl"));
ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream);
TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
assertTermEquals("Des", filter, termAtt);
assertTermEquals("mot", filter, termAtt);
@ -1890,7 +1890,7 @@ public class TestASCIIFoldingFilter extends BaseTokenStreamTestCase {
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(inputText.toString()));
ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream);
TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
Iterator<String> expectedIter = expectedOutputTokens.iterator();
while (expectedIter.hasNext()) {
assertTermEquals(expectedIter.next(), filter, termAtt);
@ -1898,8 +1898,8 @@ public class TestASCIIFoldingFilter extends BaseTokenStreamTestCase {
assertFalse(filter.incrementToken());
}
void assertTermEquals(String expected, TokenStream stream, TermAttribute termAtt) throws Exception {
void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt) throws Exception {
assertTrue(stream.incrementToken());
assertEquals(expected, termAtt.term());
assertEquals(expected, termAtt.toString());
}
}

View File

@ -24,7 +24,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.Payload;
public class TestAnalyzers extends BaseTokenStreamTestCase {
@ -120,26 +120,6 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
String[] y = StandardTokenizer.TOKEN_TYPES;
}
private static class MyStandardAnalyzer extends StandardAnalyzer {
public MyStandardAnalyzer() {
super(TEST_VERSION_CURRENT);
}
@Override
public TokenStream tokenStream(String field, Reader reader) {
return new WhitespaceAnalyzer(TEST_VERSION_CURRENT).tokenStream(field, reader);
}
}
public void testSubclassOverridingOnlyTokenStream() throws Throwable {
Analyzer a = new MyStandardAnalyzer();
TokenStream ts = a.reusableTokenStream("field", new StringReader("the"));
// StandardAnalyzer will discard "the" (it's a
// stopword), by my subclass will not:
assertTrue(ts.incrementToken());
assertFalse(ts.incrementToken());
}
private static class LowerCaseWhitespaceAnalyzer extends Analyzer {
@Override
@ -202,8 +182,8 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
String highSurEndingLower = "bogustermboguster\ud801";
tokenizer.reset(new StringReader(highSurEndingUpper));
assertTokenStreamContents(filter, new String[] {highSurEndingLower});
assertTrue(filter.hasAttribute(TermAttribute.class));
char[] termBuffer = filter.getAttribute(TermAttribute.class).termBuffer();
assertTrue(filter.hasAttribute(CharTermAttribute.class));
char[] termBuffer = filter.getAttribute(CharTermAttribute.class).buffer();
int length = highSurEndingLower.length();
assertEquals('\ud801', termBuffer[length - 1]);
assertEquals('\udc3e', termBuffer[length]);

View File

@ -21,7 +21,7 @@ package org.apache.lucene.analysis;
import java.io.IOException;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.TermVector;
@ -43,7 +43,7 @@ public class TestCachingTokenFilter extends BaseTokenStreamTestCase {
Document doc = new Document();
TokenStream stream = new TokenStream() {
private int index = 0;
private TermAttribute termAtt = addAttribute(TermAttribute.class);
private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
@Override
@ -52,7 +52,7 @@ public class TestCachingTokenFilter extends BaseTokenStreamTestCase {
return false;
} else {
clearAttributes();
termAtt.setTermBuffer(tokens[index++]);
termAtt.append(tokens[index++]);
offsetAtt.setOffset(0,0);
return true;
}
@ -100,11 +100,10 @@ public class TestCachingTokenFilter extends BaseTokenStreamTestCase {
private void checkTokens(TokenStream stream) throws IOException {
int count = 0;
TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
assertNotNull(termAtt);
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
while (stream.incrementToken()) {
assertTrue(count < tokens.length);
assertEquals(tokens[count], termAtt.term());
assertEquals(tokens[count], termAtt.toString());
count++;
}

View File

@ -17,14 +17,14 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.StringReader;
public class TestISOLatin1AccentFilter extends BaseTokenStreamTestCase {
public void testU() throws Exception {
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Des mot clés À LA CHAÎNE À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï IJ Ð Ñ Ò Ó Ô Õ Ö Ø Œ Þ Ù Ú Û Ü Ý Ÿ à á â ã ä å æ ç è é ê ë ì í î ï ij ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl"));
ISOLatin1AccentFilter filter = new ISOLatin1AccentFilter(stream);
TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
assertTermEquals("Des", filter, termAtt);
assertTermEquals("mot", filter, termAtt);
assertTermEquals("cles", filter, termAtt);
@ -103,8 +103,8 @@ public class TestISOLatin1AccentFilter extends BaseTokenStreamTestCase {
assertFalse(filter.incrementToken());
}
void assertTermEquals(String expected, TokenStream stream, TermAttribute termAtt) throws Exception {
void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt) throws Exception {
assertTrue(stream.incrementToken());
assertEquals(expected, termAtt.term());
assertEquals(expected, termAtt.toString());
}
}

View File

@ -6,7 +6,7 @@ import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.junit.Test;
/**
@ -53,20 +53,20 @@ public class TestKeywordMarkerTokenFilter extends BaseTokenStreamTestCase {
public static class LowerCaseFilterMock extends TokenFilter {
private TermAttribute termAtt;
private KeywordAttribute keywordAttr;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
public LowerCaseFilterMock(TokenStream in) {
super(in);
termAtt = addAttribute(TermAttribute.class);
keywordAttr = addAttribute(KeywordAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword())
termAtt.setTermBuffer(termAtt.term().toLowerCase());
if (!keywordAttr.isKeyword()) {
final String term = termAtt.toString().toLowerCase();
termAtt.setEmpty().append(term);
}
return true;
}
return false;

View File

@ -17,7 +17,7 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.StringReader;
public class TestLengthFilter extends BaseTokenStreamTestCase {
@ -26,14 +26,14 @@ public class TestLengthFilter extends BaseTokenStreamTestCase {
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader("short toolong evenmuchlongertext a ab toolong foo"));
LengthFilter filter = new LengthFilter(stream, 2, 6);
TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
assertTrue(filter.incrementToken());
assertEquals("short", termAtt.term());
assertEquals("short", termAtt.toString());
assertTrue(filter.incrementToken());
assertEquals("ab", termAtt.term());
assertEquals("ab", termAtt.toString());
assertTrue(filter.incrementToken());
assertEquals("foo", termAtt.term());
assertEquals("foo", termAtt.toString());
assertFalse(filter.incrementToken());
}

View File

@ -2,7 +2,7 @@ package org.apache.lucene.analysis;
import java.io.StringReader;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -30,19 +30,19 @@ public class TestPerFieldAnalzyerWrapper extends BaseTokenStreamTestCase {
TokenStream tokenStream = analyzer.tokenStream("field",
new StringReader(text));
TermAttribute termAtt = tokenStream.getAttribute(TermAttribute.class);
CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
assertTrue(tokenStream.incrementToken());
assertEquals("WhitespaceAnalyzer does not lowercase",
"Qwerty",
termAtt.term());
termAtt.toString());
tokenStream = analyzer.tokenStream("special",
new StringReader(text));
termAtt = tokenStream.getAttribute(TermAttribute.class);
termAtt = tokenStream.getAttribute(CharTermAttribute.class);
assertTrue(tokenStream.incrementToken());
assertEquals("SimpleAnalyzer lowercases",
"qwerty",
termAtt.term());
termAtt.toString());
}
}

View File

@ -18,7 +18,7 @@ package org.apache.lucene.analysis;
*/
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;
import java.io.StringReader;
@ -51,10 +51,10 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
StringReader reader = new StringReader("This is a test of the english stop analyzer");
TokenStream stream = stop.tokenStream("test", reader);
assertTrue(stream != null);
TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
while (stream.incrementToken()) {
assertFalse(inValidTokens.contains(termAtt.term()));
assertFalse(inValidTokens.contains(termAtt.toString()));
}
}
@ -67,11 +67,11 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
StringReader reader = new StringReader("This is a good test of the english stop analyzer");
TokenStream stream = newStop.tokenStream("test", reader);
assertNotNull(stream);
TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
while (stream.incrementToken()) {
String text = termAtt.term();
String text = termAtt.toString();
assertFalse(stopWordsSet.contains(text));
assertEquals(1,posIncrAtt.getPositionIncrement()); // in 2.4 stop tokenizer does not apply increments.
}
@ -88,11 +88,11 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
TokenStream stream = newStop.tokenStream("test", reader);
assertNotNull(stream);
int i = 0;
TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
while (stream.incrementToken()) {
String text = termAtt.term();
String text = termAtt.toString();
assertFalse(stopWordsSet.contains(text));
assertEquals(expectedIncr[i++],posIncrAtt.getPositionIncrement());
}

View File

@ -17,7 +17,7 @@ package org.apache.lucene.analysis;
*/
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.English;
import org.apache.lucene.util.Version;
@ -37,11 +37,11 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
StringReader reader = new StringReader("Now is The Time");
Set<String> stopWords = new HashSet<String>(Arrays.asList("is", "the", "Time"));
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, false);
final TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
assertTrue(stream.incrementToken());
assertEquals("Now", termAtt.term());
assertEquals("Now", termAtt.toString());
assertTrue(stream.incrementToken());
assertEquals("The", termAtt.term());
assertEquals("The", termAtt.toString());
assertFalse(stream.incrementToken());
}
@ -49,9 +49,9 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
StringReader reader = new StringReader("Now is The Time");
Set<Object> stopWords = new HashSet<Object>(Arrays.asList( "is", "the", "Time" ));
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, true);
final TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
assertTrue(stream.incrementToken());
assertEquals("Now", termAtt.term());
assertEquals("Now", termAtt.toString());
assertFalse(stream.incrementToken());
}
@ -60,11 +60,11 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
String[] stopWords = new String[] { "is", "the", "Time" };
Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
final TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
assertTrue(stream.incrementToken());
assertEquals("Now", termAtt.term());
assertEquals("Now", termAtt.toString());
assertTrue(stream.incrementToken());
assertEquals("The", termAtt.term());
assertEquals("The", termAtt.toString());
assertFalse(stream.incrementToken());
}
@ -117,13 +117,13 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
private void doTestStopPositons(StopFilter stpf, boolean enableIcrements) throws IOException {
log("---> test with enable-increments-"+(enableIcrements?"enabled":"disabled"));
stpf.setEnablePositionIncrements(enableIcrements);
TermAttribute termAtt = stpf.getAttribute(TermAttribute.class);
CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
for (int i=0; i<20; i+=3) {
assertTrue(stpf.incrementToken());
log("Token "+i+": "+stpf);
String w = English.intToEnglish(i).trim();
assertEquals("expecting token "+i+" to be "+w,w,termAtt.term());
assertEquals("expecting token "+i+" to be "+w,w,termAtt.toString());
assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,posIncrAtt.getPositionIncrement());
}
assertFalse(stpf.incrementToken());

View File

@ -19,7 +19,7 @@ package org.apache.lucene.analysis;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.English;
import java.io.IOException;
@ -59,16 +59,16 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
static final TeeSinkTokenFilter.SinkFilter theFilter = new TeeSinkTokenFilter.SinkFilter() {
@Override
public boolean accept(AttributeSource a) {
TermAttribute termAtt = a.getAttribute(TermAttribute.class);
return termAtt.term().equalsIgnoreCase("The");
CharTermAttribute termAtt = a.getAttribute(CharTermAttribute.class);
return termAtt.toString().equalsIgnoreCase("The");
}
};
static final TeeSinkTokenFilter.SinkFilter dogFilter = new TeeSinkTokenFilter.SinkFilter() {
@Override
public boolean accept(AttributeSource a) {
TermAttribute termAtt = a.getAttribute(TermAttribute.class);
return termAtt.term().equalsIgnoreCase("Dogs");
CharTermAttribute termAtt = a.getAttribute(CharTermAttribute.class);
return termAtt.toString().equalsIgnoreCase("Dogs");
}
};
@ -135,8 +135,8 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
TokenStream sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(100));
teeStream.consumeAllTokens();
TokenStream stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString()))), 100);
TermAttribute tfTok = stream.addAttribute(TermAttribute.class);
TermAttribute sinkTok = sink.addAttribute(TermAttribute.class);
CharTermAttribute tfTok = stream.addAttribute(CharTermAttribute.class);
CharTermAttribute sinkTok = sink.addAttribute(CharTermAttribute.class);
for (int i=0; stream.incrementToken(); i++) {
assertTrue(sink.incrementToken());
assertTrue(tfTok + " is not equal to " + sinkTok + " at token: " + i, tfTok.equals(sinkTok) == true);

View File

@ -244,8 +244,8 @@ public class TestToken extends LuceneTestCase {
assertTrue("TypeAttribute is not implemented by SenselessAttributeImpl",
ts.addAttribute(SenselessAttribute.class) instanceof SenselessAttributeImpl);
assertTrue("TermAttribute is not implemented by Token",
ts.addAttribute(TermAttribute.class) instanceof Token);
assertTrue("CharTermAttribute is not implemented by Token",
ts.addAttribute(CharTermAttribute.class) instanceof Token);
assertTrue("OffsetAttribute is not implemented by Token",
ts.addAttribute(OffsetAttribute.class) instanceof Token);
assertTrue("FlagsAttribute is not implemented by Token",

View File

@ -29,7 +29,7 @@ import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
@ -152,15 +152,15 @@ public class TestDocumentWriter extends LuceneTestCase {
restoreState(state);
payloadAtt.setPayload(null);
posIncrAtt.setPositionIncrement(0);
termAtt.setTermBuffer(new char[]{'b'}, 0, 1);
termAtt.setEmpty().append("b");
state = null;
return true;
}
boolean hasNext = input.incrementToken();
if (!hasNext) return false;
if (Character.isDigit(termAtt.termBuffer()[0])) {
posIncrAtt.setPositionIncrement(termAtt.termBuffer()[0] - '0');
if (Character.isDigit(termAtt.buffer()[0])) {
posIncrAtt.setPositionIncrement(termAtt.buffer()[0] - '0');
}
if (first) {
// set payload on first position only
@ -174,7 +174,7 @@ public class TestDocumentWriter extends LuceneTestCase {
}
TermAttribute termAtt = addAttribute(TermAttribute.class);
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
};
@ -215,7 +215,7 @@ public class TestDocumentWriter extends LuceneTestCase {
private String[] tokens = new String[] {"term1", "term2", "term3", "term2"};
private int index = 0;
private TermAttribute termAtt = addAttribute(TermAttribute.class);
private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
@Override
public boolean incrementToken() throws IOException {
@ -223,7 +223,7 @@ public class TestDocumentWriter extends LuceneTestCase {
return false;
} else {
clearAttributes();
termAtt.setTermBuffer(tokens[index++]);
termAtt.setEmpty().append(tokens[index++]);
return true;
}
}

View File

@ -46,7 +46,7 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -3482,7 +3482,7 @@ public class TestIndexWriter extends LuceneTestCase {
// LUCENE-1255
public void testNegativePositions() throws Throwable {
final TokenStream tokens = new TokenStream() {
final TermAttribute termAtt = addAttribute(TermAttribute.class);
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
final Iterator<String> terms = Arrays.asList("a","b","c").iterator();
@ -3492,7 +3492,7 @@ public class TestIndexWriter extends LuceneTestCase {
public boolean incrementToken() {
if (!terms.hasNext()) return false;
clearAttributes();
termAtt.setTermBuffer( terms.next());
termAtt.append(terms.next());
posIncrAtt.setPositionIncrement(first ? 0 : 1);
first = false;
return true;

View File

@ -33,7 +33,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
@ -538,7 +538,7 @@ public class TestPayloads extends LuceneTestCase {
private ByteArrayPool pool;
private String term;
TermAttribute termAtt;
CharTermAttribute termAtt;
PayloadAttribute payloadAtt;
PoolingPayloadTokenStream(ByteArrayPool pool) {
@ -548,7 +548,7 @@ public class TestPayloads extends LuceneTestCase {
term = pool.bytesToString(payload);
first = true;
payloadAtt = addAttribute(PayloadAttribute.class);
termAtt = addAttribute(TermAttribute.class);
termAtt = addAttribute(CharTermAttribute.class);
}
@Override
@ -556,7 +556,7 @@ public class TestPayloads extends LuceneTestCase {
if (!first) return false;
first = false;
clearAttributes();
termAtt.setTermBuffer(term);
termAtt.append(term);
payloadAtt.setPayload(new Payload(payload));
return true;
}

View File

@ -28,7 +28,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.MockRAMDirectory;
@ -123,12 +123,12 @@ public class TestTermVectorsReader extends LuceneTestCase {
private class MyTokenStream extends TokenStream {
int tokenUpto;
TermAttribute termAtt;
CharTermAttribute termAtt;
PositionIncrementAttribute posIncrAtt;
OffsetAttribute offsetAtt;
public MyTokenStream() {
termAtt = addAttribute(TermAttribute.class);
termAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
}
@ -140,7 +140,7 @@ public class TestTermVectorsReader extends LuceneTestCase {
else {
final TestToken testToken = tokens[tokenUpto++];
clearAttributes();
termAtt.setTermBuffer(testToken.text);
termAtt.append(testToken.text);
offsetAtt.setOffset(testToken.startOffset, testToken.endOffset);
if (tokenUpto > 1) {
posIncrAtt.setPositionIncrement(testToken.pos - tokens[tokenUpto-2].pos);

View File

@ -23,7 +23,7 @@ import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
@ -33,12 +33,12 @@ import org.apache.lucene.util.LuceneTestCase;
class RepeatingTokenStream extends TokenStream {
public int num;
TermAttribute termAtt;
CharTermAttribute termAtt;
String value;
public RepeatingTokenStream(String val) {
this.value = val;
this.termAtt = addAttribute(TermAttribute.class);
this.termAtt = addAttribute(CharTermAttribute.class);
}
@Override
@ -46,7 +46,7 @@ class RepeatingTokenStream extends TokenStream {
num--;
if (num >= 0) {
clearAttributes();
termAtt.setTermBuffer(value);
termAtt.append(value);
return true;
}
return false;

View File

@ -26,7 +26,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.search.Query;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -148,14 +148,14 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
private int prevStartOffset;
private int prevEndOffset;
TermAttribute termAtt;
CharTermAttribute termAtt;
PositionIncrementAttribute posIncrAtt;
OffsetAttribute offsetAtt;
TypeAttribute typeAtt;
public TestFilter(TokenStream in) {
super(in);
termAtt = addAttribute(TermAttribute.class);
termAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
typeAtt = addAttribute(TypeAttribute.class);
@ -164,7 +164,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
@Override
public final boolean incrementToken() throws java.io.IOException {
if (multiToken > 0) {
termAtt.setTermBuffer("multi"+(multiToken+1));
termAtt.setEmpty().append("multi"+(multiToken+1));
offsetAtt.setOffset(prevStartOffset, prevEndOffset);
typeAtt.setType(prevType);
posIncrAtt.setPositionIncrement(0);
@ -178,7 +178,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
prevType = typeAtt.type();
prevStartOffset = offsetAtt.startOffset();
prevEndOffset = offsetAtt.endOffset();
String text = termAtt.term();
String text = termAtt.toString();
if (text.equals("triplemulti")) {
multiToken = 2;
return true;
@ -212,21 +212,21 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
private final class TestPosIncrementFilter extends TokenFilter {
TermAttribute termAtt;
CharTermAttribute termAtt;
PositionIncrementAttribute posIncrAtt;
public TestPosIncrementFilter(TokenStream in) {
super(in);
termAtt = addAttribute(TermAttribute.class);
termAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
}
@Override
public final boolean incrementToken () throws java.io.IOException {
while(input.incrementToken()) {
if (termAtt.term().equals("the")) {
if (termAtt.toString().equals("the")) {
// stopword, do nothing
} else if (termAtt.term().equals("quick")) {
} else if (termAtt.toString().equals("quick")) {
posIncrAtt.setPositionIncrement(2);
return true;
} else {

View File

@ -40,7 +40,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
@ -82,7 +82,7 @@ public class TestQueryParser extends LocalizedTestCase {
public static Analyzer qpAnalyzer = new QPTestAnalyzer();
public static class QPTestFilter extends TokenFilter {
TermAttribute termAtt;
CharTermAttribute termAtt;
OffsetAttribute offsetAtt;
/**
@ -91,7 +91,7 @@ public class TestQueryParser extends LocalizedTestCase {
*/
public QPTestFilter(TokenStream in) {
super(in);
termAtt = addAttribute(TermAttribute.class);
termAtt = addAttribute(CharTermAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
}
@ -103,19 +103,19 @@ public class TestQueryParser extends LocalizedTestCase {
if (inPhrase) {
inPhrase = false;
clearAttributes();
termAtt.setTermBuffer("phrase2");
termAtt.append("phrase2");
offsetAtt.setOffset(savedStart, savedEnd);
return true;
} else
while (input.incrementToken()) {
if (termAtt.term().equals("phrase")) {
if (termAtt.toString().equals("phrase")) {
inPhrase = true;
savedStart = offsetAtt.startOffset();
savedEnd = offsetAtt.endOffset();
termAtt.setTermBuffer("phrase1");
termAtt.setEmpty().append("phrase1");
offsetAtt.setOffset(savedStart, savedEnd);
return true;
} else if (!termAtt.term().equals("stop"))
} else if (!termAtt.toString().equals("stop"))
return true;
}
return false;

View File

@ -31,7 +31,7 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -74,7 +74,7 @@ public class TestPositionIncrement extends LuceneTestCase {
private int i = 0;
PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
TermAttribute termAtt = addAttribute(TermAttribute.class);
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
@Override
@ -82,7 +82,7 @@ public class TestPositionIncrement extends LuceneTestCase {
if (i == TOKENS.length)
return false;
clearAttributes();
termAtt.setTermBuffer(TOKENS[i]);
termAtt.append(TOKENS[i]);
offsetAtt.setOffset(i,i);
posIncrAtt.setPositionIncrement(INCREMENTS[i]);
i++;
@ -347,7 +347,7 @@ class PayloadFilter extends TokenFilter {
final PositionIncrementAttribute posIncrAttr;
final PayloadAttribute payloadAttr;
final TermAttribute termAttr;
final CharTermAttribute termAttr;
public PayloadFilter(TokenStream input, String fieldName) {
super(input);
@ -356,7 +356,7 @@ class PayloadFilter extends TokenFilter {
i = 0;
posIncrAttr = input.addAttribute(PositionIncrementAttribute.class);
payloadAttr = input.addAttribute(PayloadAttribute.class);
termAttr = input.addAttribute(TermAttribute.class);
termAttr = input.addAttribute(CharTermAttribute.class);
}
@Override
@ -372,7 +372,7 @@ class PayloadFilter extends TokenFilter {
posIncrAttr.setPositionIncrement(posIncr);
pos += posIncr;
if (TestPositionIncrement.VERBOSE) {
System.out.println("term=" + termAttr.term() + " pos=" + pos);
System.out.println("term=" + termAttr + " pos=" + pos);
}
i++;
return true;

View File

@ -27,7 +27,7 @@ import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
@ -280,27 +280,25 @@ public class TestTermRangeQuery extends LuceneTestCase {
private static class SingleCharTokenizer extends Tokenizer {
char[] buffer = new char[1];
boolean done;
TermAttribute termAtt;
boolean done = false;
CharTermAttribute termAtt;
public SingleCharTokenizer(Reader r) {
super(r);
termAtt = addAttribute(TermAttribute.class);
termAtt = addAttribute(CharTermAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
int count = input.read(buffer);
if (done)
return false;
else {
int count = input.read(buffer);
clearAttributes();
done = true;
if (count == 1) {
termAtt.termBuffer()[0] = buffer[0];
termAtt.setTermLength(1);
} else
termAtt.setTermLength(0);
termAtt.copyBuffer(buffer, 0, 1);
}
return true;
}
}

View File

@ -29,7 +29,7 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
@ -479,7 +479,7 @@ public class TestPayloadSpans extends LuceneTestCase {
Set<String> nopayload = new HashSet<String>();
int pos;
PayloadAttribute payloadAtt;
TermAttribute termAtt;
CharTermAttribute termAtt;
PositionIncrementAttribute posIncrAtt;
public PayloadFilter(TokenStream input, String fieldName) {
@ -490,7 +490,7 @@ public class TestPayloadSpans extends LuceneTestCase {
entities.add("one");
nopayload.add("nopayload");
nopayload.add("np");
termAtt = addAttribute(TermAttribute.class);
termAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
payloadAtt = addAttribute(PayloadAttribute.class);
}
@ -498,7 +498,7 @@ public class TestPayloadSpans extends LuceneTestCase {
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
String token = new String(termAtt.termBuffer(), 0, termAtt.termLength());
String token = termAtt.toString();
if (!nopayload.contains(token)) {
if (entities.contains(token)) {