From d02cbfe3c16fdd11bf7acafdd7034055cc2cf686 Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Sat, 10 Apr 2010 15:41:27 +0000 Subject: [PATCH] LUCENE-2372: Convert core analyzers to CharTermAttribute. Also made rest of core analyzers final. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@932749 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 + .../apache/lucene/analysis/TestAnalyzers.java | 2 + .../lucene/analysis/ASCIIFoldingFilter.java | 11 ++- .../org/apache/lucene/analysis/Analyzer.java | 22 ----- .../apache/lucene/analysis/CharTokenizer.java | 29 +++--- .../analysis/ISOLatin1AccentFilter.java | 13 ++- .../lucene/analysis/KeywordAnalyzer.java | 26 +----- .../analysis/KeywordMarkerTokenFilter.java | 12 ++- .../lucene/analysis/KeywordTokenizer.java | 27 +++--- .../apache/lucene/analysis/LengthFilter.java | 11 ++- .../lucene/analysis/LowerCaseFilter.java | 12 ++- .../lucene/analysis/NumericTokenStream.java | 8 +- .../analysis/PerFieldAnalyzerWrapper.java | 8 +- .../lucene/analysis/PorterStemFilter.java | 15 ++-- .../lucene/analysis/ReusableAnalyzerBase.java | 4 +- .../apache/lucene/analysis/StopFilter.java | 10 +-- .../analysis/standard/StandardAnalyzer.java | 88 ++++++------------- .../analysis/standard/StandardFilter.java | 19 ++-- .../lucene/collation/CollationKeyFilter.java | 19 ++-- .../lucene/queryParser/QueryParser.java | 16 ++-- .../apache/lucene/queryParser/QueryParser.jj | 16 ++-- .../queryParser/QueryParserTokenManager.java | 2 +- .../apache/lucene/search/QueryTermVector.java | 6 +- .../analysis/BaseTokenStreamTestCase.java | 8 +- .../analysis/TestASCIIFoldingFilter.java | 10 +-- .../apache/lucene/analysis/TestAnalyzers.java | 26 +----- .../analysis/TestCachingTokenFilter.java | 11 ++- .../analysis/TestISOLatin1AccentFilter.java | 8 +- .../TestKeywordMarkerTokenFilter.java | 14 +-- .../lucene/analysis/TestLengthFilter.java | 10 +-- .../analysis/TestPerFieldAnalzyerWrapper.java | 10 +-- .../lucene/analysis/TestStopAnalyzer.java | 14 +-- .../lucene/analysis/TestStopFilter.java | 22 ++--- .../analysis/TestTeeSinkTokenFilter.java | 14 +-- .../org/apache/lucene/analysis/TestToken.java | 4 +- .../lucene/index/TestDocumentWriter.java | 14 +-- .../apache/lucene/index/TestIndexWriter.java | 6 +- .../org/apache/lucene/index/TestPayloads.java | 8 +- .../lucene/index/TestTermVectorsReader.java | 8 +- .../apache/lucene/index/TestTermdocPerf.java | 8 +- .../lucene/queryParser/TestMultiAnalyzer.java | 18 ++-- .../lucene/queryParser/TestQueryParser.java | 14 +-- .../lucene/search/TestPositionIncrement.java | 12 +-- .../lucene/search/TestTermRangeQuery.java | 16 ++-- .../lucene/search/spans/TestPayloadSpans.java | 8 +- 45 files changed, 258 insertions(+), 385 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index e9ca6912fbe..1d44d66d2d6 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -97,6 +97,10 @@ Changes in backwards compatibility policy TODO: Point to new attribute inspection API coming with LUCENE-2374. (Uwe Schindler, Robert Muir) +* LUCENE-2372: StandardAnalyzer, KeywordAnalyzer, PerFieldAnalyzerWrapper + are now final. Also removed the now obsolete and deprecated + Analyzer.setOverridesTokenStreamMethod(). (Uwe Schindler) + Changes in runtime behavior * LUCENE-1923: Made IndexReader.toString() produce something diff --git a/lucene/backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java b/lucene/backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java index c7805f62142..260df587577 100644 --- a/lucene/backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java +++ b/lucene/backwards/src/test/org/apache/lucene/analysis/TestAnalyzers.java @@ -120,6 +120,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { String[] y = StandardTokenizer.TOKEN_TYPES; } + /* StandardAnalyzer was made final in 3.1: private static class MyStandardAnalyzer extends StandardAnalyzer { public MyStandardAnalyzer() { super(org.apache.lucene.util.Version.LUCENE_CURRENT); @@ -139,6 +140,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { assertTrue(ts.incrementToken()); assertFalse(ts.incrementToken()); } + */ } class PayloadSetter extends TokenFilter { diff --git a/lucene/src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java b/lucene/src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java index c94c1246584..296874267ba 100644 --- a/lucene/src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java +++ b/lucene/src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java @@ -19,7 +19,7 @@ package org.apache.lucene.analysis; import java.io.IOException; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.RamUsageEstimator; @@ -61,18 +61,17 @@ public final class ASCIIFoldingFilter extends TokenFilter { public ASCIIFoldingFilter(TokenStream input) { super(input); - termAtt = addAttribute(TermAttribute.class); } private char[] output = new char[512]; private int outputPos; - private TermAttribute termAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - final char[] buffer = termAtt.termBuffer(); - final int length = termAtt.termLength(); + final char[] buffer = termAtt.buffer(); + final int length = termAtt.length(); // If no characters actually require rewriting then we // just return token as-is: @@ -81,7 +80,7 @@ public final class ASCIIFoldingFilter extends TokenFilter { if (c >= '\u0080') { foldToASCII(buffer, length); - termAtt.setTermBuffer(output, 0, outputPos); + termAtt.copyBuffer(output, 0, outputPos); break; } } diff --git a/lucene/src/java/org/apache/lucene/analysis/Analyzer.java b/lucene/src/java/org/apache/lucene/analysis/Analyzer.java index 66817c75470..cae6b5ca088 100644 --- a/lucene/src/java/org/apache/lucene/analysis/Analyzer.java +++ b/lucene/src/java/org/apache/lucene/analysis/Analyzer.java @@ -84,28 +84,6 @@ public abstract class Analyzer implements Closeable { } } - private static final VirtualMethod tokenStreamMethod = - new VirtualMethod(Analyzer.class, "tokenStream", String.class, Reader.class); - private static final VirtualMethod reusableTokenStreamMethod = - new VirtualMethod(Analyzer.class, "reusableTokenStream", String.class, Reader.class); - - /** This field contains if the {@link #tokenStream} method was overridden in a - * more far away subclass of {@code Analyzer} on the current instance's inheritance path. - * If this field is {@code true}, {@link #reusableTokenStream} should delegate to {@link #tokenStream} - * instead of using the own implementation. - * @deprecated Please declare all implementations of {@link #reusableTokenStream} and {@link #tokenStream} - * as {@code final}. - */ - @Deprecated - protected final boolean overridesTokenStreamMethod = - VirtualMethod.compareImplementationDistance(this.getClass(), tokenStreamMethod, reusableTokenStreamMethod) > 0; - - /** @deprecated This is a no-op since Lucene 3.1. */ - @Deprecated - protected void setOverridesTokenStreamMethod(Class baseClass) { - } - - /** * Invoked before indexing a Fieldable instance if * terms have already been added to that field. This allows custom diff --git a/lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java b/lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java index 8865a1e3780..26dae22f9bd 100644 --- a/lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java +++ b/lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java @@ -21,7 +21,7 @@ import java.io.IOException; import java.io.Reader; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.CharacterUtils; import org.apache.lucene.util.Version; @@ -78,10 +78,7 @@ public abstract class CharTokenizer extends Tokenizer { public CharTokenizer(Version matchVersion, Reader input) { super(input); charUtils = CharacterUtils.getInstance(matchVersion); - offsetAtt = addAttribute(OffsetAttribute.class); - termAtt = addAttribute(TermAttribute.class); useOldAPI = useOldAPI(matchVersion); - ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE); } @@ -99,10 +96,7 @@ public abstract class CharTokenizer extends Tokenizer { Reader input) { super(source, input); charUtils = CharacterUtils.getInstance(matchVersion); - offsetAtt = addAttribute(OffsetAttribute.class); - termAtt = addAttribute(TermAttribute.class); useOldAPI = useOldAPI(matchVersion); - ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE); } /** @@ -119,10 +113,7 @@ public abstract class CharTokenizer extends Tokenizer { Reader input) { super(factory, input); charUtils = CharacterUtils.getInstance(matchVersion); - offsetAtt = addAttribute(OffsetAttribute.class); - termAtt = addAttribute(TermAttribute.class); useOldAPI = useOldAPI(matchVersion); - ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE); } /** @@ -164,11 +155,11 @@ public abstract class CharTokenizer extends Tokenizer { private static final int MAX_WORD_LEN = 255; private static final int IO_BUFFER_SIZE = 4096; - private final TermAttribute termAtt; - private final OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);; + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); private final CharacterUtils charUtils; - private final CharacterBuffer ioBuffer; + private final CharacterBuffer ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE); /** * @deprecated this will be removed in lucene 4.0 @@ -275,7 +266,7 @@ public abstract class CharTokenizer extends Tokenizer { return incrementTokenOld(); int length = 0; int start = bufferIndex; - char[] buffer = termAtt.termBuffer(); + char[] buffer = termAtt.buffer(); while (true) { if (bufferIndex >= dataLen) { offset += dataLen; @@ -297,7 +288,7 @@ public abstract class CharTokenizer extends Tokenizer { if (length == 0) // start of token start = offset + bufferIndex - 1; else if (length >= buffer.length-1) // check if a supplementary could run out of bounds - buffer = termAtt.resizeTermBuffer(2+length); // make sure a supplementary fits in the buffer + buffer = termAtt.resizeBuffer(2+length); // make sure a supplementary fits in the buffer length += Character.toChars(normalize(c), buffer, length); // buffer it, normalized if (length >= MAX_WORD_LEN) // buffer overflow! make sure to check for >= surrogate pair could break == test break; @@ -305,7 +296,7 @@ public abstract class CharTokenizer extends Tokenizer { break; // return 'em } - termAtt.setTermLength(length); + termAtt.setLength(length); offsetAtt.setOffset(correctOffset(start), correctOffset(start+length)); return true; @@ -320,7 +311,7 @@ public abstract class CharTokenizer extends Tokenizer { private boolean incrementTokenOld() throws IOException { int length = 0; int start = bufferIndex; - char[] buffer = termAtt.termBuffer(); + char[] buffer = termAtt.buffer(); final char[] oldIoBuffer = ioBuffer.getBuffer(); while (true) { @@ -344,7 +335,7 @@ public abstract class CharTokenizer extends Tokenizer { if (length == 0) // start of token start = offset + bufferIndex - 1; else if (length == buffer.length) - buffer = termAtt.resizeTermBuffer(1+length); + buffer = termAtt.resizeBuffer(1+length); buffer[length++] = normalize(c); // buffer it, normalized @@ -355,7 +346,7 @@ public abstract class CharTokenizer extends Tokenizer { break; // return 'em } - termAtt.setTermLength(length); + termAtt.setLength(length); offsetAtt.setOffset(correctOffset(start), correctOffset(start+length)); return true; } diff --git a/lucene/src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java b/lucene/src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java index f9a5c06aa8f..03378ab8588 100644 --- a/lucene/src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java +++ b/lucene/src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java @@ -1,7 +1,5 @@ package org.apache.lucene.analysis; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; - /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -19,6 +17,8 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute; * limitations under the License. */ +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; + /** * A filter that replaces accented characters in the ISO Latin 1 character set * (ISO-8859-1) by their unaccented equivalent. The case will not be altered. @@ -35,25 +35,24 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute; public final class ISOLatin1AccentFilter extends TokenFilter { public ISOLatin1AccentFilter(TokenStream input) { super(input); - termAtt = addAttribute(TermAttribute.class); } private char[] output = new char[256]; private int outputPos; - private TermAttribute termAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); @Override public final boolean incrementToken() throws java.io.IOException { if (input.incrementToken()) { - final char[] buffer = termAtt.termBuffer(); - final int length = termAtt.termLength(); + final char[] buffer = termAtt.buffer(); + final int length = termAtt.length(); // If no characters actually require rewriting then we // just return token as-is: for(int i=0;i= '\u00c0' && c <= '\uFB06') { removeAccents(buffer, length); - termAtt.setTermBuffer(output, 0, outputPos); + termAtt.copyBuffer(output, 0, outputPos); break; } } diff --git a/lucene/src/java/org/apache/lucene/analysis/KeywordAnalyzer.java b/lucene/src/java/org/apache/lucene/analysis/KeywordAnalyzer.java index 4f858782ca3..74d0f4c118a 100644 --- a/lucene/src/java/org/apache/lucene/analysis/KeywordAnalyzer.java +++ b/lucene/src/java/org/apache/lucene/analysis/KeywordAnalyzer.java @@ -17,36 +17,18 @@ package org.apache.lucene.analysis; * limitations under the License. */ -import java.io.IOException; import java.io.Reader; /** * "Tokenizes" the entire stream as a single token. This is useful * for data like zip codes, ids, and some product names. */ -public class KeywordAnalyzer extends Analyzer { +public final class KeywordAnalyzer extends ReusableAnalyzerBase { public KeywordAnalyzer() { } + @Override - public TokenStream tokenStream(String fieldName, - final Reader reader) { - return new KeywordTokenizer(reader); - } - @Override - public TokenStream reusableTokenStream(String fieldName, - final Reader reader) throws IOException { - if (overridesTokenStreamMethod) { - // LUCENE-1678: force fallback to tokenStream() if we - // have been subclassed and that subclass overrides - // tokenStream but not reusableTokenStream - return tokenStream(fieldName, reader); - } - Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream(); - if (tokenizer == null) { - tokenizer = new KeywordTokenizer(reader); - setPreviousTokenStream(tokenizer); - } else - tokenizer.reset(reader); - return tokenizer; + protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { + return new TokenStreamComponents(new KeywordTokenizer(reader)); } } diff --git a/lucene/src/java/org/apache/lucene/analysis/KeywordMarkerTokenFilter.java b/lucene/src/java/org/apache/lucene/analysis/KeywordMarkerTokenFilter.java index f7e76daa5ac..1eaaa104f02 100644 --- a/lucene/src/java/org/apache/lucene/analysis/KeywordMarkerTokenFilter.java +++ b/lucene/src/java/org/apache/lucene/analysis/KeywordMarkerTokenFilter.java @@ -21,7 +21,7 @@ import java.io.IOException; import java.util.Set; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.Version; /** @@ -33,8 +33,8 @@ import org.apache.lucene.util.Version; */ public final class KeywordMarkerTokenFilter extends TokenFilter { - private final KeywordAttribute keywordAttr; - private final TermAttribute termAtt; + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final CharArraySet keywordSet; /** @@ -50,8 +50,6 @@ public final class KeywordMarkerTokenFilter extends TokenFilter { public KeywordMarkerTokenFilter(final TokenStream in, final CharArraySet keywordSet) { super(in); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); this.keywordSet = keywordSet; } @@ -73,8 +71,8 @@ public final class KeywordMarkerTokenFilter extends TokenFilter { @Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { - keywordAttr.setKeyword(keywordSet.contains(termAtt.termBuffer(), 0, - termAtt.termLength())); + keywordAttr.setKeyword(keywordSet.contains(termAtt.buffer(), 0, + termAtt.length())); return true; } else return false; diff --git a/lucene/src/java/org/apache/lucene/analysis/KeywordTokenizer.java b/lucene/src/java/org/apache/lucene/analysis/KeywordTokenizer.java index 2990040e3c8..8b818be8ad0 100644 --- a/lucene/src/java/org/apache/lucene/analysis/KeywordTokenizer.java +++ b/lucene/src/java/org/apache/lucene/analysis/KeywordTokenizer.java @@ -21,7 +21,7 @@ import java.io.IOException; import java.io.Reader; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.AttributeSource; /** @@ -31,10 +31,10 @@ public final class KeywordTokenizer extends Tokenizer { private static final int DEFAULT_BUFFER_SIZE = 256; - private boolean done; + private boolean done = false; private int finalOffset; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); public KeywordTokenizer(Reader input) { this(input, DEFAULT_BUFFER_SIZE); @@ -42,24 +42,17 @@ public final class KeywordTokenizer extends Tokenizer { public KeywordTokenizer(Reader input, int bufferSize) { super(input); - init(bufferSize); + termAtt.resizeBuffer(bufferSize); } public KeywordTokenizer(AttributeSource source, Reader input, int bufferSize) { super(source, input); - init(bufferSize); + termAtt.resizeBuffer(bufferSize); } public KeywordTokenizer(AttributeFactory factory, Reader input, int bufferSize) { super(factory, input); - init(bufferSize); - } - - private void init(int bufferSize) { - this.done = false; - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - termAtt.resizeTermBuffer(bufferSize); + termAtt.resizeBuffer(bufferSize); } @Override @@ -68,15 +61,15 @@ public final class KeywordTokenizer extends Tokenizer { clearAttributes(); done = true; int upto = 0; - char[] buffer = termAtt.termBuffer(); + char[] buffer = termAtt.buffer(); while (true) { final int length = input.read(buffer, upto, buffer.length-upto); if (length == -1) break; upto += length; if (upto == buffer.length) - buffer = termAtt.resizeTermBuffer(1+buffer.length); + buffer = termAtt.resizeBuffer(1+buffer.length); } - termAtt.setTermLength(upto); + termAtt.setLength(upto); finalOffset = correctOffset(upto); offsetAtt.setOffset(correctOffset(0), finalOffset); return true; diff --git a/lucene/src/java/org/apache/lucene/analysis/LengthFilter.java b/lucene/src/java/org/apache/lucene/analysis/LengthFilter.java index 3010a212965..551d2d0ae46 100644 --- a/lucene/src/java/org/apache/lucene/analysis/LengthFilter.java +++ b/lucene/src/java/org/apache/lucene/analysis/LengthFilter.java @@ -19,17 +19,17 @@ package org.apache.lucene.analysis; import java.io.IOException; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * Removes words that are too long or too short from the stream. */ public final class LengthFilter extends TokenFilter { - final int min; - final int max; + private final int min; + private final int max; - private TermAttribute termAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); /** * Build a filter that removes words that are too long or too @@ -40,7 +40,6 @@ public final class LengthFilter extends TokenFilter { super(in); this.min = min; this.max = max; - termAtt = addAttribute(TermAttribute.class); } /** @@ -50,7 +49,7 @@ public final class LengthFilter extends TokenFilter { public final boolean incrementToken() throws IOException { // return the first non-stop word found while (input.incrementToken()) { - int len = termAtt.termLength(); + int len = termAtt.length(); if (len >= min && len <= max) { return true; } diff --git a/lucene/src/java/org/apache/lucene/analysis/LowerCaseFilter.java b/lucene/src/java/org/apache/lucene/analysis/LowerCaseFilter.java index 41093514778..7a4d7693890 100644 --- a/lucene/src/java/org/apache/lucene/analysis/LowerCaseFilter.java +++ b/lucene/src/java/org/apache/lucene/analysis/LowerCaseFilter.java @@ -19,7 +19,7 @@ package org.apache.lucene.analysis; import java.io.IOException; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.CharacterUtils; import org.apache.lucene.util.Version; @@ -34,7 +34,8 @@ import org.apache.lucene.util.Version; */ public final class LowerCaseFilter extends TokenFilter { private final CharacterUtils charUtils; - + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + /** * Create a new LowerCaseFilter, that normalizes token text to lower case. * @@ -43,7 +44,6 @@ public final class LowerCaseFilter extends TokenFilter { */ public LowerCaseFilter(Version matchVersion, TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); charUtils = CharacterUtils.getInstance(matchVersion); } @@ -55,13 +55,11 @@ public final class LowerCaseFilter extends TokenFilter { this(Version.LUCENE_30, in); } - private TermAttribute termAtt; - @Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { - final char[] buffer = termAtt.termBuffer(); - final int length = termAtt.termLength(); + final char[] buffer = termAtt.buffer(); + final int length = termAtt.length(); for (int i = 0; i < length;) { i += Character.toChars( Character.toLowerCase( diff --git a/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java b/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java index b7e1c679268..9e81f26d048 100644 --- a/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java +++ b/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java @@ -26,7 +26,6 @@ import org.apache.lucene.document.NumericField; // for javadocs import org.apache.lucene.search.NumericRangeQuery; // for javadocs import org.apache.lucene.search.NumericRangeFilter; // for javadocs import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; @@ -118,11 +117,14 @@ public final class NumericTokenStream extends TokenStream { this.delegate = delegate; } - @Override + @Override @SuppressWarnings("deprecation") public AttributeImpl createAttributeInstance(Class attClass) { if (attClass == NumericTermAttribute.class) return new NumericTermAttributeImpl(ts); - if (attClass.isAssignableFrom(CharTermAttribute.class) || attClass.isAssignableFrom(TermAttribute.class)) + if (attClass.isAssignableFrom(CharTermAttribute.class) || + // TODO: remove in 4.0 (deprecated class, also remove the suppress above): + attClass.isAssignableFrom(org.apache.lucene.analysis.tokenattributes.TermAttribute.class) + ) throw new IllegalArgumentException("NumericTokenStream does not support CharTermAttribute/TermAttribute."); return delegate.createAttributeInstance(attClass); } diff --git a/lucene/src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java b/lucene/src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java index 2eeadc2253d..a640c33e30a 100644 --- a/lucene/src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java +++ b/lucene/src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java @@ -44,7 +44,7 @@ import java.util.HashMap; *

A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing * and query parsing. */ -public class PerFieldAnalyzerWrapper extends Analyzer { +public final class PerFieldAnalyzerWrapper extends Analyzer { private Analyzer defaultAnalyzer; private Map analyzerMap = new HashMap(); @@ -99,12 +99,6 @@ public class PerFieldAnalyzerWrapper extends Analyzer { @Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - if (overridesTokenStreamMethod) { - // LUCENE-1678: force fallback to tokenStream() if we - // have been subclassed and that subclass overrides - // tokenStream but not reusableTokenStream - return tokenStream(fieldName, reader); - } Analyzer analyzer = analyzerMap.get(fieldName); if (analyzer == null) analyzer = defaultAnalyzer; diff --git a/lucene/src/java/org/apache/lucene/analysis/PorterStemFilter.java b/lucene/src/java/org/apache/lucene/analysis/PorterStemFilter.java index 645ab9c577f..9f0d80a2a85 100644 --- a/lucene/src/java/org/apache/lucene/analysis/PorterStemFilter.java +++ b/lucene/src/java/org/apache/lucene/analysis/PorterStemFilter.java @@ -20,7 +20,7 @@ package org.apache.lucene.analysis; import java.io.IOException; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** Transforms the token stream as per the Porter stemming algorithm. Note: the input to the stemming filter must already be in lower case, @@ -47,15 +47,12 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;

*/ public final class PorterStemFilter extends TokenFilter { - private final PorterStemmer stemmer; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final PorterStemmer stemmer = new PorterStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public PorterStemFilter(TokenStream in) { super(in); - stemmer = new PorterStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } @Override @@ -63,8 +60,8 @@ public final class PorterStemFilter extends TokenFilter { if (!input.incrementToken()) return false; - if ((!keywordAttr.isKeyword()) && stemmer.stem(termAtt.termBuffer(), 0, termAtt.termLength())) - termAtt.setTermBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength()); + if ((!keywordAttr.isKeyword()) && stemmer.stem(termAtt.buffer(), 0, termAtt.length())) + termAtt.copyBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength()); return true; } } diff --git a/lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java b/lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java index 8dc5120c6a6..2c3986a6281 100644 --- a/lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java +++ b/lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java @@ -100,8 +100,8 @@ public abstract class ReusableAnalyzerBase extends Analyzer { * {@link Analyzer#reusableTokenStream(String, Reader)}. */ public static class TokenStreamComponents { - final Tokenizer source; - final TokenStream sink; + protected final Tokenizer source; + protected final TokenStream sink; /** * Creates a new {@link TokenStreamComponents} instance. diff --git a/lucene/src/java/org/apache/lucene/analysis/StopFilter.java b/lucene/src/java/org/apache/lucene/analysis/StopFilter.java index ea457770e78..18d1a8a2db9 100644 --- a/lucene/src/java/org/apache/lucene/analysis/StopFilter.java +++ b/lucene/src/java/org/apache/lucene/analysis/StopFilter.java @@ -23,7 +23,7 @@ import java.util.Set; import java.util.List; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.queryParser.QueryParser; // for javadoc import org.apache.lucene.util.Version; @@ -44,8 +44,8 @@ public final class StopFilter extends TokenFilter { private final CharArraySet stopWords; private boolean enablePositionIncrements = false; - private TermAttribute termAtt; - private PositionIncrementAttribute posIncrAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); /** * Construct a token stream filtering the given input. @@ -104,8 +104,6 @@ public final class StopFilter extends TokenFilter { super(input); this.stopWords = stopWords instanceof CharArraySet ? (CharArraySet)stopWords : new CharArraySet(matchVersion, stopWords, ignoreCase); this.enablePositionIncrements = enablePositionIncrements; - termAtt = addAttribute(TermAttribute.class); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); } /** @@ -257,7 +255,7 @@ public final class StopFilter extends TokenFilter { // return the first non-stop word found int skippedPositions = 0; while (input.incrementToken()) { - if (!stopWords.contains(termAtt.termBuffer(), 0, termAtt.termLength())) { + if (!stopWords.contains(termAtt.buffer(), 0, termAtt.length())) { if (enablePositionIncrements) { posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions); } diff --git a/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java b/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java index d9058c4dac3..a09ce1ff629 100644 --- a/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java +++ b/lucene/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java @@ -42,8 +42,12 @@ import java.util.Set; * are corrected (see LUCENE-1068) * */ -public class StandardAnalyzer extends Analyzer { - private Set stopSet; +public final class StandardAnalyzer extends StopwordAnalyzerBase { + + /** Default maximum allowed token length */ + public static final int DEFAULT_MAX_TOKEN_LENGTH = 255; + + private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH; /** * Specifies whether deprecated acronyms should be replaced with HOST type. @@ -54,7 +58,15 @@ public class StandardAnalyzer extends Analyzer { /** An unmodifiable set containing some common English words that are usually not useful for searching. */ public static final Set STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; - private final Version matchVersion; + + /** Builds an analyzer with the given stop words. + * @param matchVersion Lucene version to match See {@link + * above} + * @param stopWords stop words */ + public StandardAnalyzer(Version matchVersion, Set stopWords) { + super(matchVersion, stopWords); + replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24); + } /** Builds an analyzer with the default stop words ({@link * #STOP_WORDS_SET}). @@ -65,16 +77,6 @@ public class StandardAnalyzer extends Analyzer { this(matchVersion, STOP_WORDS_SET); } - /** Builds an analyzer with the given stop words. - * @param matchVersion Lucene version to match See {@link - * above} - * @param stopWords stop words */ - public StandardAnalyzer(Version matchVersion, Set stopWords) { - stopSet = stopWords; - replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24); - this.matchVersion = matchVersion; - } - /** Builds an analyzer with the stop words from the given file. * @see WordlistLoader#getWordSet(File) * @param matchVersion Lucene version to match See {@link @@ -93,28 +95,6 @@ public class StandardAnalyzer extends Analyzer { this(matchVersion, WordlistLoader.getWordSet(stopwords)); } - /** Constructs a {@link StandardTokenizer} filtered by a {@link - StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */ - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - StandardTokenizer tokenStream = new StandardTokenizer(matchVersion, reader); - tokenStream.setMaxTokenLength(maxTokenLength); - TokenStream result = new StandardFilter(tokenStream); - result = new LowerCaseFilter(matchVersion, result); - result = new StopFilter(matchVersion, result, stopSet); - return result; - } - - private static final class SavedStreams { - StandardTokenizer tokenStream; - TokenStream filteredTokenStream; - } - - /** Default maximum allowed token length */ - public static final int DEFAULT_MAX_TOKEN_LENGTH = 255; - - private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH; - /** * Set maximum allowed token length. If a token is seen * that exceeds this length then it is discarded. This @@ -133,29 +113,19 @@ public class StandardAnalyzer extends Analyzer { } @Override - public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - if (overridesTokenStreamMethod) { - // LUCENE-1678: force fallback to tokenStream() if we - // have been subclassed and that subclass overrides - // tokenStream but not reusableTokenStream - return tokenStream(fieldName, reader); - } - SavedStreams streams = (SavedStreams) getPreviousTokenStream(); - if (streams == null) { - streams = new SavedStreams(); - setPreviousTokenStream(streams); - streams.tokenStream = new StandardTokenizer(matchVersion, reader); - streams.filteredTokenStream = new StandardFilter(streams.tokenStream); - streams.filteredTokenStream = new LowerCaseFilter(matchVersion, - streams.filteredTokenStream); - streams.filteredTokenStream = new StopFilter(matchVersion, streams.filteredTokenStream, stopSet); - } else { - streams.tokenStream.reset(reader); - } - streams.tokenStream.setMaxTokenLength(maxTokenLength); - - streams.tokenStream.setReplaceInvalidAcronym(replaceInvalidAcronym); - - return streams.filteredTokenStream; + protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { + final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); + src.setMaxTokenLength(maxTokenLength); + src.setReplaceInvalidAcronym(replaceInvalidAcronym); + TokenStream tok = new StandardFilter(src); + tok = new LowerCaseFilter(matchVersion, tok); + tok = new StopFilter(matchVersion, tok, stopwords); + return new TokenStreamComponents(src, tok) { + @Override + protected boolean reset(final Reader reader) throws IOException { + src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength); + return super.reset(reader); + } + }; } } diff --git a/lucene/src/java/org/apache/lucene/analysis/standard/StandardFilter.java b/lucene/src/java/org/apache/lucene/analysis/standard/StandardFilter.java index 714b9d0178a..b6394e523ab 100644 --- a/lucene/src/java/org/apache/lucene/analysis/standard/StandardFilter.java +++ b/lucene/src/java/org/apache/lucene/analysis/standard/StandardFilter.java @@ -19,27 +19,24 @@ package org.apache.lucene.analysis.standard; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; /** Normalizes tokens extracted with {@link StandardTokenizer}. */ public final class StandardFilter extends TokenFilter { - /** Construct filtering in. */ public StandardFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); } private static final String APOSTROPHE_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.APOSTROPHE]; private static final String ACRONYM_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ACRONYM]; // this filters uses attribute type - private final TypeAttribute typeAtt; - private final TermAttribute termAtt; + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); /** Returns the next token in the stream, or null at EOS. *

Removes 's from the end of words. @@ -51,16 +48,16 @@ public final class StandardFilter extends TokenFilter { return false; } - char[] buffer = termAtt.termBuffer(); - final int bufferLength = termAtt.termLength(); + final char[] buffer = termAtt.buffer(); + final int bufferLength = termAtt.length(); final String type = typeAtt.type(); if (type == APOSTROPHE_TYPE && // remove 's - bufferLength >= 2 && + bufferLength >= 2 && buffer[bufferLength-2] == '\'' && (buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S')) { // Strip last 2 characters off - termAtt.setTermLength(bufferLength - 2); + termAtt.setLength(bufferLength - 2); } else if (type == ACRONYM_TYPE) { // remove dots int upto = 0; for(int i=0;i */ public final class CollationKeyFilter extends TokenFilter { - private Collator collator = null; - private TermAttribute termAtt; + private final Collator collator; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); /** * @param input Source token stream @@ -83,23 +83,18 @@ public final class CollationKeyFilter extends TokenFilter { public CollationKeyFilter(TokenStream input, Collator collator) { super(input); this.collator = collator; - termAtt = addAttribute(TermAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - char[] termBuffer = termAtt.termBuffer(); - String termText = new String(termBuffer, 0, termAtt.termLength()); - byte[] collationKey = collator.getCollationKey(termText).toByteArray(); + byte[] collationKey = collator.getCollationKey(termAtt.toString()).toByteArray(); int encodedLength = IndexableBinaryStringTools.getEncodedLength( collationKey, 0, collationKey.length); - if (encodedLength > termBuffer.length) { - termAtt.resizeTermBuffer(encodedLength); - } - termAtt.setTermLength(encodedLength); + termAtt.resizeBuffer(encodedLength); + termAtt.setLength(encodedLength); IndexableBinaryStringTools.encode(collationKey, 0, collationKey.length, - termAtt.termBuffer(), 0, encodedLength); + termAtt.buffer(), 0, encodedLength); return true; } else { return false; diff --git a/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java b/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java index 37e4076fcfe..238833ce2bd 100644 --- a/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java +++ b/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java @@ -17,7 +17,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.DateField; import org.apache.lucene.document.DateTools; import org.apache.lucene.index.Term; @@ -522,7 +522,7 @@ public class QueryParser implements QueryParserConstants { source = analyzer.tokenStream(field, new StringReader(queryText)); } CachingTokenFilter buffer = new CachingTokenFilter(source); - TermAttribute termAtt = null; + CharTermAttribute termAtt = null; PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; @@ -534,8 +534,8 @@ public class QueryParser implements QueryParserConstants { // success==false if we hit an exception } if (success) { - if (buffer.hasAttribute(TermAttribute.class)) { - termAtt = buffer.getAttribute(TermAttribute.class); + if (buffer.hasAttribute(CharTermAttribute.class)) { + termAtt = buffer.getAttribute(CharTermAttribute.class); } if (buffer.hasAttribute(PositionIncrementAttribute.class)) { posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); @@ -581,7 +581,7 @@ public class QueryParser implements QueryParserConstants { try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.term(); + term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } @@ -596,7 +596,7 @@ public class QueryParser implements QueryParserConstants { try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.term(); + term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } @@ -619,7 +619,7 @@ public class QueryParser implements QueryParserConstants { try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.term(); + term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } @@ -659,7 +659,7 @@ public class QueryParser implements QueryParserConstants { try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.term(); + term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } diff --git a/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj b/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj index ad58c926e20..1784114a4b8 100644 --- a/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj +++ b/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj @@ -41,7 +41,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.DateField; import org.apache.lucene.document.DateTools; import org.apache.lucene.index.Term; @@ -546,7 +546,7 @@ public class QueryParser { source = analyzer.tokenStream(field, new StringReader(queryText)); } CachingTokenFilter buffer = new CachingTokenFilter(source); - TermAttribute termAtt = null; + CharTermAttribute termAtt = null; PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; @@ -558,8 +558,8 @@ public class QueryParser { // success==false if we hit an exception } if (success) { - if (buffer.hasAttribute(TermAttribute.class)) { - termAtt = buffer.getAttribute(TermAttribute.class); + if (buffer.hasAttribute(CharTermAttribute.class)) { + termAtt = buffer.getAttribute(CharTermAttribute.class); } if (buffer.hasAttribute(PositionIncrementAttribute.class)) { posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); @@ -605,7 +605,7 @@ public class QueryParser { try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.term(); + term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } @@ -620,7 +620,7 @@ public class QueryParser { try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.term(); + term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } @@ -643,7 +643,7 @@ public class QueryParser { try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.term(); + term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } @@ -683,7 +683,7 @@ public class QueryParser { try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.term(); + term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } diff --git a/lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java b/lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java index 10d9243a2ac..5443eea46c3 100644 --- a/lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java +++ b/lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java @@ -15,7 +15,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.DateField; import org.apache.lucene.document.DateTools; import org.apache.lucene.index.Term; diff --git a/lucene/src/java/org/apache/lucene/search/QueryTermVector.java b/lucene/src/java/org/apache/lucene/search/QueryTermVector.java index 3334c5f203d..fcb9f37f048 100644 --- a/lucene/src/java/org/apache/lucene/search/QueryTermVector.java +++ b/lucene/src/java/org/apache/lucene/search/QueryTermVector.java @@ -28,7 +28,7 @@ import java.util.Map; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.TermFreqVector; /** @@ -61,11 +61,11 @@ public class QueryTermVector implements TermFreqVector { boolean hasMoreTokens = false; stream.reset(); - TermAttribute termAtt = stream.addAttribute(TermAttribute.class); + final CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); hasMoreTokens = stream.incrementToken(); while (hasMoreTokens) { - terms.add(termAtt.term()); + terms.add(termAtt.toString()); hasMoreTokens = stream.incrementToken(); } processTerms(terms.toArray(new String[terms.size()])); diff --git a/lucene/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java b/lucene/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java index 271a92eea1a..f6a06e7f695 100644 --- a/lucene/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java +++ b/lucene/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java @@ -83,8 +83,8 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { assertNotNull(output); CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class); - assertTrue("has no TermAttribute", ts.hasAttribute(TermAttribute.class)); - TermAttribute termAtt = ts.getAttribute(TermAttribute.class); + assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class)); + CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = null; if (startOffsets != null || endOffsets != null || finalOffset != null) { @@ -108,7 +108,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { for (int i = 0; i < output.length; i++) { // extra safety to enforce, that the state is not preserved and also assign bogus values ts.clearAttributes(); - termAtt.setTermBuffer("bogusTerm"); + termAtt.setEmpty().append("bogusTerm"); if (offsetAtt != null) offsetAtt.setOffset(14584724,24683243); if (typeAtt != null) typeAtt.setType("bogusType"); if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657); @@ -117,7 +117,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase { assertTrue("token "+i+" does not exist", ts.incrementToken()); assertTrue("clearAttributes() was not called correctly in TokenStream chain", checkClearAtt.getAndResetClearCalled()); - assertEquals("term "+i, output[i], termAtt.term()); + assertEquals("term "+i, output[i], termAtt.toString()); if (startOffsets != null) assertEquals("startOffset "+i, startOffsets[i], offsetAtt.startOffset()); if (endOffsets != null) diff --git a/lucene/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java b/lucene/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java index a04408ab46d..103c06976eb 100644 --- a/lucene/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java +++ b/lucene/src/test/org/apache/lucene/analysis/TestASCIIFoldingFilter.java @@ -17,7 +17,7 @@ package org.apache.lucene.analysis; * limitations under the License. */ -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import java.io.StringReader; import java.util.List; import java.util.ArrayList; @@ -33,7 +33,7 @@ public class TestASCIIFoldingFilter extends BaseTokenStreamTestCase { +" ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl")); ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream); - TermAttribute termAtt = filter.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); assertTermEquals("Des", filter, termAtt); assertTermEquals("mot", filter, termAtt); @@ -1890,7 +1890,7 @@ public class TestASCIIFoldingFilter extends BaseTokenStreamTestCase { TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(inputText.toString())); ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream); - TermAttribute termAtt = filter.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); Iterator expectedIter = expectedOutputTokens.iterator(); while (expectedIter.hasNext()) { assertTermEquals(expectedIter.next(), filter, termAtt); @@ -1898,8 +1898,8 @@ public class TestASCIIFoldingFilter extends BaseTokenStreamTestCase { assertFalse(filter.incrementToken()); } - void assertTermEquals(String expected, TokenStream stream, TermAttribute termAtt) throws Exception { + void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt) throws Exception { assertTrue(stream.incrementToken()); - assertEquals(expected, termAtt.term()); + assertEquals(expected, termAtt.toString()); } } diff --git a/lucene/src/test/org/apache/lucene/analysis/TestAnalyzers.java b/lucene/src/test/org/apache/lucene/analysis/TestAnalyzers.java index 06b1bb1bba8..ca8a88aaa9f 100644 --- a/lucene/src/test/org/apache/lucene/analysis/TestAnalyzers.java +++ b/lucene/src/test/org/apache/lucene/analysis/TestAnalyzers.java @@ -24,7 +24,7 @@ import java.io.Reader; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.Payload; public class TestAnalyzers extends BaseTokenStreamTestCase { @@ -120,26 +120,6 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { String[] y = StandardTokenizer.TOKEN_TYPES; } - private static class MyStandardAnalyzer extends StandardAnalyzer { - public MyStandardAnalyzer() { - super(TEST_VERSION_CURRENT); - } - - @Override - public TokenStream tokenStream(String field, Reader reader) { - return new WhitespaceAnalyzer(TEST_VERSION_CURRENT).tokenStream(field, reader); - } - } - - public void testSubclassOverridingOnlyTokenStream() throws Throwable { - Analyzer a = new MyStandardAnalyzer(); - TokenStream ts = a.reusableTokenStream("field", new StringReader("the")); - // StandardAnalyzer will discard "the" (it's a - // stopword), by my subclass will not: - assertTrue(ts.incrementToken()); - assertFalse(ts.incrementToken()); - } - private static class LowerCaseWhitespaceAnalyzer extends Analyzer { @Override @@ -202,8 +182,8 @@ public class TestAnalyzers extends BaseTokenStreamTestCase { String highSurEndingLower = "bogustermboguster\ud801"; tokenizer.reset(new StringReader(highSurEndingUpper)); assertTokenStreamContents(filter, new String[] {highSurEndingLower}); - assertTrue(filter.hasAttribute(TermAttribute.class)); - char[] termBuffer = filter.getAttribute(TermAttribute.class).termBuffer(); + assertTrue(filter.hasAttribute(CharTermAttribute.class)); + char[] termBuffer = filter.getAttribute(CharTermAttribute.class).buffer(); int length = highSurEndingLower.length(); assertEquals('\ud801', termBuffer[length - 1]); assertEquals('\udc3e', termBuffer[length]); diff --git a/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java b/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java index 649943a4f23..52418877d2c 100644 --- a/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java +++ b/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java @@ -21,7 +21,7 @@ package org.apache.lucene.analysis; import java.io.IOException; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.TermVector; @@ -43,7 +43,7 @@ public class TestCachingTokenFilter extends BaseTokenStreamTestCase { Document doc = new Document(); TokenStream stream = new TokenStream() { private int index = 0; - private TermAttribute termAtt = addAttribute(TermAttribute.class); + private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); @Override @@ -52,7 +52,7 @@ public class TestCachingTokenFilter extends BaseTokenStreamTestCase { return false; } else { clearAttributes(); - termAtt.setTermBuffer(tokens[index++]); + termAtt.append(tokens[index++]); offsetAtt.setOffset(0,0); return true; } @@ -100,11 +100,10 @@ public class TestCachingTokenFilter extends BaseTokenStreamTestCase { private void checkTokens(TokenStream stream) throws IOException { int count = 0; - TermAttribute termAtt = stream.getAttribute(TermAttribute.class); - assertNotNull(termAtt); + CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); while (stream.incrementToken()) { assertTrue(count < tokens.length); - assertEquals(tokens[count], termAtt.term()); + assertEquals(tokens[count], termAtt.toString()); count++; } diff --git a/lucene/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java b/lucene/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java index e424ccc06f6..a7d2b953bce 100644 --- a/lucene/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java +++ b/lucene/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java @@ -17,14 +17,14 @@ package org.apache.lucene.analysis; * limitations under the License. */ -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import java.io.StringReader; public class TestISOLatin1AccentFilter extends BaseTokenStreamTestCase { public void testU() throws Exception { TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Des mot clés À LA CHAÎNE À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï IJ Ð Ñ Ò Ó Ô Õ Ö Ø Œ Þ Ù Ú Û Ü Ý Ÿ à á â ã ä å æ ç è é ê ë ì í î ï ij ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl")); ISOLatin1AccentFilter filter = new ISOLatin1AccentFilter(stream); - TermAttribute termAtt = filter.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); assertTermEquals("Des", filter, termAtt); assertTermEquals("mot", filter, termAtt); assertTermEquals("cles", filter, termAtt); @@ -103,8 +103,8 @@ public class TestISOLatin1AccentFilter extends BaseTokenStreamTestCase { assertFalse(filter.incrementToken()); } - void assertTermEquals(String expected, TokenStream stream, TermAttribute termAtt) throws Exception { + void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt) throws Exception { assertTrue(stream.incrementToken()); - assertEquals(expected, termAtt.term()); + assertEquals(expected, termAtt.toString()); } } diff --git a/lucene/src/test/org/apache/lucene/analysis/TestKeywordMarkerTokenFilter.java b/lucene/src/test/org/apache/lucene/analysis/TestKeywordMarkerTokenFilter.java index 9826c38c4ff..147dc0d4b9f 100644 --- a/lucene/src/test/org/apache/lucene/analysis/TestKeywordMarkerTokenFilter.java +++ b/lucene/src/test/org/apache/lucene/analysis/TestKeywordMarkerTokenFilter.java @@ -6,7 +6,7 @@ import java.util.HashSet; import java.util.Set; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.junit.Test; /** @@ -53,20 +53,20 @@ public class TestKeywordMarkerTokenFilter extends BaseTokenStreamTestCase { public static class LowerCaseFilterMock extends TokenFilter { - private TermAttribute termAtt; - private KeywordAttribute keywordAttr; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public LowerCaseFilterMock(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - if (!keywordAttr.isKeyword()) - termAtt.setTermBuffer(termAtt.term().toLowerCase()); + if (!keywordAttr.isKeyword()) { + final String term = termAtt.toString().toLowerCase(); + termAtt.setEmpty().append(term); + } return true; } return false; diff --git a/lucene/src/test/org/apache/lucene/analysis/TestLengthFilter.java b/lucene/src/test/org/apache/lucene/analysis/TestLengthFilter.java index 94f4a9570a3..d671bff7dd8 100644 --- a/lucene/src/test/org/apache/lucene/analysis/TestLengthFilter.java +++ b/lucene/src/test/org/apache/lucene/analysis/TestLengthFilter.java @@ -17,7 +17,7 @@ package org.apache.lucene.analysis; * limitations under the License. */ -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import java.io.StringReader; public class TestLengthFilter extends BaseTokenStreamTestCase { @@ -26,14 +26,14 @@ public class TestLengthFilter extends BaseTokenStreamTestCase { TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("short toolong evenmuchlongertext a ab toolong foo")); LengthFilter filter = new LengthFilter(stream, 2, 6); - TermAttribute termAtt = filter.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); assertTrue(filter.incrementToken()); - assertEquals("short", termAtt.term()); + assertEquals("short", termAtt.toString()); assertTrue(filter.incrementToken()); - assertEquals("ab", termAtt.term()); + assertEquals("ab", termAtt.toString()); assertTrue(filter.incrementToken()); - assertEquals("foo", termAtt.term()); + assertEquals("foo", termAtt.toString()); assertFalse(filter.incrementToken()); } diff --git a/lucene/src/test/org/apache/lucene/analysis/TestPerFieldAnalzyerWrapper.java b/lucene/src/test/org/apache/lucene/analysis/TestPerFieldAnalzyerWrapper.java index be5fcd62ec7..790e6e8eb33 100644 --- a/lucene/src/test/org/apache/lucene/analysis/TestPerFieldAnalzyerWrapper.java +++ b/lucene/src/test/org/apache/lucene/analysis/TestPerFieldAnalzyerWrapper.java @@ -2,7 +2,7 @@ package org.apache.lucene.analysis; import java.io.StringReader; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -30,19 +30,19 @@ public class TestPerFieldAnalzyerWrapper extends BaseTokenStreamTestCase { TokenStream tokenStream = analyzer.tokenStream("field", new StringReader(text)); - TermAttribute termAtt = tokenStream.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class); assertTrue(tokenStream.incrementToken()); assertEquals("WhitespaceAnalyzer does not lowercase", "Qwerty", - termAtt.term()); + termAtt.toString()); tokenStream = analyzer.tokenStream("special", new StringReader(text)); - termAtt = tokenStream.getAttribute(TermAttribute.class); + termAtt = tokenStream.getAttribute(CharTermAttribute.class); assertTrue(tokenStream.incrementToken()); assertEquals("SimpleAnalyzer lowercases", "qwerty", - termAtt.term()); + termAtt.toString()); } } diff --git a/lucene/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java b/lucene/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java index 18e84305342..4eb35df33f7 100644 --- a/lucene/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java +++ b/lucene/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java @@ -18,7 +18,7 @@ package org.apache.lucene.analysis; */ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.Version; import java.io.StringReader; @@ -51,10 +51,10 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase { StringReader reader = new StringReader("This is a test of the english stop analyzer"); TokenStream stream = stop.tokenStream("test", reader); assertTrue(stream != null); - TermAttribute termAtt = stream.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); while (stream.incrementToken()) { - assertFalse(inValidTokens.contains(termAtt.term())); + assertFalse(inValidTokens.contains(termAtt.toString())); } } @@ -67,11 +67,11 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase { StringReader reader = new StringReader("This is a good test of the english stop analyzer"); TokenStream stream = newStop.tokenStream("test", reader); assertNotNull(stream); - TermAttribute termAtt = stream.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class); while (stream.incrementToken()) { - String text = termAtt.term(); + String text = termAtt.toString(); assertFalse(stopWordsSet.contains(text)); assertEquals(1,posIncrAtt.getPositionIncrement()); // in 2.4 stop tokenizer does not apply increments. } @@ -88,11 +88,11 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase { TokenStream stream = newStop.tokenStream("test", reader); assertNotNull(stream); int i = 0; - TermAttribute termAtt = stream.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class); while (stream.incrementToken()) { - String text = termAtt.term(); + String text = termAtt.toString(); assertFalse(stopWordsSet.contains(text)); assertEquals(expectedIncr[i++],posIncrAtt.getPositionIncrement()); } diff --git a/lucene/src/test/org/apache/lucene/analysis/TestStopFilter.java b/lucene/src/test/org/apache/lucene/analysis/TestStopFilter.java index 5415a745f87..ec989a51817 100644 --- a/lucene/src/test/org/apache/lucene/analysis/TestStopFilter.java +++ b/lucene/src/test/org/apache/lucene/analysis/TestStopFilter.java @@ -17,7 +17,7 @@ package org.apache.lucene.analysis; */ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.English; import org.apache.lucene.util.Version; @@ -37,11 +37,11 @@ public class TestStopFilter extends BaseTokenStreamTestCase { StringReader reader = new StringReader("Now is The Time"); Set stopWords = new HashSet(Arrays.asList("is", "the", "Time")); TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, false); - final TermAttribute termAtt = stream.getAttribute(TermAttribute.class); + final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); assertTrue(stream.incrementToken()); - assertEquals("Now", termAtt.term()); + assertEquals("Now", termAtt.toString()); assertTrue(stream.incrementToken()); - assertEquals("The", termAtt.term()); + assertEquals("The", termAtt.toString()); assertFalse(stream.incrementToken()); } @@ -49,9 +49,9 @@ public class TestStopFilter extends BaseTokenStreamTestCase { StringReader reader = new StringReader("Now is The Time"); Set stopWords = new HashSet(Arrays.asList( "is", "the", "Time" )); TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, true); - final TermAttribute termAtt = stream.getAttribute(TermAttribute.class); + final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); assertTrue(stream.incrementToken()); - assertEquals("Now", termAtt.term()); + assertEquals("Now", termAtt.toString()); assertFalse(stream.incrementToken()); } @@ -60,11 +60,11 @@ public class TestStopFilter extends BaseTokenStreamTestCase { String[] stopWords = new String[] { "is", "the", "Time" }; Set stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords); TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet); - final TermAttribute termAtt = stream.getAttribute(TermAttribute.class); + final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); assertTrue(stream.incrementToken()); - assertEquals("Now", termAtt.term()); + assertEquals("Now", termAtt.toString()); assertTrue(stream.incrementToken()); - assertEquals("The", termAtt.term()); + assertEquals("The", termAtt.toString()); assertFalse(stream.incrementToken()); } @@ -117,13 +117,13 @@ public class TestStopFilter extends BaseTokenStreamTestCase { private void doTestStopPositons(StopFilter stpf, boolean enableIcrements) throws IOException { log("---> test with enable-increments-"+(enableIcrements?"enabled":"disabled")); stpf.setEnablePositionIncrements(enableIcrements); - TermAttribute termAtt = stpf.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class); for (int i=0; i<20; i+=3) { assertTrue(stpf.incrementToken()); log("Token "+i+": "+stpf); String w = English.intToEnglish(i).trim(); - assertEquals("expecting token "+i+" to be "+w,w,termAtt.term()); + assertEquals("expecting token "+i+" to be "+w,w,termAtt.toString()); assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,posIncrAtt.getPositionIncrement()); } assertFalse(stpf.incrementToken()); diff --git a/lucene/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java b/lucene/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java index cc80f019f0c..c2b3249262f 100644 --- a/lucene/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java +++ b/lucene/src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java @@ -19,7 +19,7 @@ package org.apache.lucene.analysis; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.English; import java.io.IOException; @@ -59,16 +59,16 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase { static final TeeSinkTokenFilter.SinkFilter theFilter = new TeeSinkTokenFilter.SinkFilter() { @Override public boolean accept(AttributeSource a) { - TermAttribute termAtt = a.getAttribute(TermAttribute.class); - return termAtt.term().equalsIgnoreCase("The"); + CharTermAttribute termAtt = a.getAttribute(CharTermAttribute.class); + return termAtt.toString().equalsIgnoreCase("The"); } }; static final TeeSinkTokenFilter.SinkFilter dogFilter = new TeeSinkTokenFilter.SinkFilter() { @Override public boolean accept(AttributeSource a) { - TermAttribute termAtt = a.getAttribute(TermAttribute.class); - return termAtt.term().equalsIgnoreCase("Dogs"); + CharTermAttribute termAtt = a.getAttribute(CharTermAttribute.class); + return termAtt.toString().equalsIgnoreCase("Dogs"); } }; @@ -135,8 +135,8 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase { TokenStream sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(100)); teeStream.consumeAllTokens(); TokenStream stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString()))), 100); - TermAttribute tfTok = stream.addAttribute(TermAttribute.class); - TermAttribute sinkTok = sink.addAttribute(TermAttribute.class); + CharTermAttribute tfTok = stream.addAttribute(CharTermAttribute.class); + CharTermAttribute sinkTok = sink.addAttribute(CharTermAttribute.class); for (int i=0; stream.incrementToken(); i++) { assertTrue(sink.incrementToken()); assertTrue(tfTok + " is not equal to " + sinkTok + " at token: " + i, tfTok.equals(sinkTok) == true); diff --git a/lucene/src/test/org/apache/lucene/analysis/TestToken.java b/lucene/src/test/org/apache/lucene/analysis/TestToken.java index b3cb3a7f2f1..be5f6116497 100644 --- a/lucene/src/test/org/apache/lucene/analysis/TestToken.java +++ b/lucene/src/test/org/apache/lucene/analysis/TestToken.java @@ -244,8 +244,8 @@ public class TestToken extends LuceneTestCase { assertTrue("TypeAttribute is not implemented by SenselessAttributeImpl", ts.addAttribute(SenselessAttribute.class) instanceof SenselessAttributeImpl); - assertTrue("TermAttribute is not implemented by Token", - ts.addAttribute(TermAttribute.class) instanceof Token); + assertTrue("CharTermAttribute is not implemented by Token", + ts.addAttribute(CharTermAttribute.class) instanceof Token); assertTrue("OffsetAttribute is not implemented by Token", ts.addAttribute(OffsetAttribute.class) instanceof Token); assertTrue("FlagsAttribute is not implemented by Token", diff --git a/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java b/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java index a0091bc029f..1cc6f33697c 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java +++ b/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java @@ -29,7 +29,7 @@ import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; @@ -152,15 +152,15 @@ public class TestDocumentWriter extends LuceneTestCase { restoreState(state); payloadAtt.setPayload(null); posIncrAtt.setPositionIncrement(0); - termAtt.setTermBuffer(new char[]{'b'}, 0, 1); + termAtt.setEmpty().append("b"); state = null; return true; } boolean hasNext = input.incrementToken(); if (!hasNext) return false; - if (Character.isDigit(termAtt.termBuffer()[0])) { - posIncrAtt.setPositionIncrement(termAtt.termBuffer()[0] - '0'); + if (Character.isDigit(termAtt.buffer()[0])) { + posIncrAtt.setPositionIncrement(termAtt.buffer()[0] - '0'); } if (first) { // set payload on first position only @@ -174,7 +174,7 @@ public class TestDocumentWriter extends LuceneTestCase { } - TermAttribute termAtt = addAttribute(TermAttribute.class); + CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); }; @@ -215,7 +215,7 @@ public class TestDocumentWriter extends LuceneTestCase { private String[] tokens = new String[] {"term1", "term2", "term3", "term2"}; private int index = 0; - private TermAttribute termAtt = addAttribute(TermAttribute.class); + private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); @Override public boolean incrementToken() throws IOException { @@ -223,7 +223,7 @@ public class TestDocumentWriter extends LuceneTestCase { return false; } else { clearAttributes(); - termAtt.setTermBuffer(tokens[index++]); + termAtt.setEmpty().append(tokens[index++]); return true; } } diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java index fbee06398de..443b90bdb9a 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -46,7 +46,7 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardTokenizer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -3482,7 +3482,7 @@ public class TestIndexWriter extends LuceneTestCase { // LUCENE-1255 public void testNegativePositions() throws Throwable { final TokenStream tokens = new TokenStream() { - final TermAttribute termAtt = addAttribute(TermAttribute.class); + final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); final Iterator terms = Arrays.asList("a","b","c").iterator(); @@ -3492,7 +3492,7 @@ public class TestIndexWriter extends LuceneTestCase { public boolean incrementToken() { if (!terms.hasNext()) return false; clearAttributes(); - termAtt.setTermBuffer( terms.next()); + termAtt.append(terms.next()); posIncrAtt.setPositionIncrement(first ? 0 : 1); first = false; return true; diff --git a/lucene/src/test/org/apache/lucene/index/TestPayloads.java b/lucene/src/test/org/apache/lucene/index/TestPayloads.java index 3c395a22b1b..959a786d725 100644 --- a/lucene/src/test/org/apache/lucene/index/TestPayloads.java +++ b/lucene/src/test/org/apache/lucene/index/TestPayloads.java @@ -33,7 +33,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -538,7 +538,7 @@ public class TestPayloads extends LuceneTestCase { private ByteArrayPool pool; private String term; - TermAttribute termAtt; + CharTermAttribute termAtt; PayloadAttribute payloadAtt; PoolingPayloadTokenStream(ByteArrayPool pool) { @@ -548,7 +548,7 @@ public class TestPayloads extends LuceneTestCase { term = pool.bytesToString(payload); first = true; payloadAtt = addAttribute(PayloadAttribute.class); - termAtt = addAttribute(TermAttribute.class); + termAtt = addAttribute(CharTermAttribute.class); } @Override @@ -556,7 +556,7 @@ public class TestPayloads extends LuceneTestCase { if (!first) return false; first = false; clearAttributes(); - termAtt.setTermBuffer(term); + termAtt.append(term); payloadAtt.setPayload(new Payload(payload)); return true; } diff --git a/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java b/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java index 8c89c0fa8e7..95f80360832 100644 --- a/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.store.MockRAMDirectory; @@ -123,12 +123,12 @@ public class TestTermVectorsReader extends LuceneTestCase { private class MyTokenStream extends TokenStream { int tokenUpto; - TermAttribute termAtt; + CharTermAttribute termAtt; PositionIncrementAttribute posIncrAtt; OffsetAttribute offsetAtt; public MyTokenStream() { - termAtt = addAttribute(TermAttribute.class); + termAtt = addAttribute(CharTermAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); } @@ -140,7 +140,7 @@ public class TestTermVectorsReader extends LuceneTestCase { else { final TestToken testToken = tokens[tokenUpto++]; clearAttributes(); - termAtt.setTermBuffer(testToken.text); + termAtt.append(testToken.text); offsetAtt.setOffset(testToken.startOffset, testToken.endOffset); if (tokenUpto > 1) { posIncrAtt.setPositionIncrement(testToken.pos - tokens[tokenUpto-2].pos); diff --git a/lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java b/lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java index 4ce7eb91f02..bfdcbb62279 100644 --- a/lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java +++ b/lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java @@ -23,7 +23,7 @@ import java.util.Random; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -33,12 +33,12 @@ import org.apache.lucene.util.LuceneTestCase; class RepeatingTokenStream extends TokenStream { public int num; - TermAttribute termAtt; + CharTermAttribute termAtt; String value; public RepeatingTokenStream(String val) { this.value = val; - this.termAtt = addAttribute(TermAttribute.class); + this.termAtt = addAttribute(CharTermAttribute.class); } @Override @@ -46,7 +46,7 @@ class RepeatingTokenStream extends TokenStream { num--; if (num >= 0) { clearAttributes(); - termAtt.setTermBuffer(value); + termAtt.append(value); return true; } return false; diff --git a/lucene/src/test/org/apache/lucene/queryParser/TestMultiAnalyzer.java b/lucene/src/test/org/apache/lucene/queryParser/TestMultiAnalyzer.java index be1af8c865a..8a6ee098acd 100644 --- a/lucene/src/test/org/apache/lucene/queryParser/TestMultiAnalyzer.java +++ b/lucene/src/test/org/apache/lucene/queryParser/TestMultiAnalyzer.java @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.search.Query; import org.apache.lucene.analysis.BaseTokenStreamTestCase; @@ -148,14 +148,14 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase { private int prevStartOffset; private int prevEndOffset; - TermAttribute termAtt; + CharTermAttribute termAtt; PositionIncrementAttribute posIncrAtt; OffsetAttribute offsetAtt; TypeAttribute typeAtt; public TestFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); + termAtt = addAttribute(CharTermAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); typeAtt = addAttribute(TypeAttribute.class); @@ -164,7 +164,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase { @Override public final boolean incrementToken() throws java.io.IOException { if (multiToken > 0) { - termAtt.setTermBuffer("multi"+(multiToken+1)); + termAtt.setEmpty().append("multi"+(multiToken+1)); offsetAtt.setOffset(prevStartOffset, prevEndOffset); typeAtt.setType(prevType); posIncrAtt.setPositionIncrement(0); @@ -178,7 +178,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase { prevType = typeAtt.type(); prevStartOffset = offsetAtt.startOffset(); prevEndOffset = offsetAtt.endOffset(); - String text = termAtt.term(); + String text = termAtt.toString(); if (text.equals("triplemulti")) { multiToken = 2; return true; @@ -212,21 +212,21 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase { private final class TestPosIncrementFilter extends TokenFilter { - TermAttribute termAtt; + CharTermAttribute termAtt; PositionIncrementAttribute posIncrAtt; public TestPosIncrementFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); + termAtt = addAttribute(CharTermAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class); } @Override public final boolean incrementToken () throws java.io.IOException { while(input.incrementToken()) { - if (termAtt.term().equals("the")) { + if (termAtt.toString().equals("the")) { // stopword, do nothing - } else if (termAtt.term().equals("quick")) { + } else if (termAtt.toString().equals("quick")) { posIncrAtt.setPositionIncrement(2); return true; } else { diff --git a/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java b/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java index da7b46813c5..4c8a4c7d1d4 100644 --- a/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java +++ b/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java @@ -40,7 +40,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.DateField; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; @@ -82,7 +82,7 @@ public class TestQueryParser extends LocalizedTestCase { public static Analyzer qpAnalyzer = new QPTestAnalyzer(); public static class QPTestFilter extends TokenFilter { - TermAttribute termAtt; + CharTermAttribute termAtt; OffsetAttribute offsetAtt; /** @@ -91,7 +91,7 @@ public class TestQueryParser extends LocalizedTestCase { */ public QPTestFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); + termAtt = addAttribute(CharTermAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); } @@ -103,19 +103,19 @@ public class TestQueryParser extends LocalizedTestCase { if (inPhrase) { inPhrase = false; clearAttributes(); - termAtt.setTermBuffer("phrase2"); + termAtt.append("phrase2"); offsetAtt.setOffset(savedStart, savedEnd); return true; } else while (input.incrementToken()) { - if (termAtt.term().equals("phrase")) { + if (termAtt.toString().equals("phrase")) { inPhrase = true; savedStart = offsetAtt.startOffset(); savedEnd = offsetAtt.endOffset(); - termAtt.setTermBuffer("phrase1"); + termAtt.setEmpty().append("phrase1"); offsetAtt.setOffset(savedStart, savedEnd); return true; - } else if (!termAtt.term().equals("stop")) + } else if (!termAtt.toString().equals("stop")) return true; } return false; diff --git a/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java b/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java index 2a525718664..8d94af4e6ff 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java +++ b/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java @@ -31,7 +31,7 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -74,7 +74,7 @@ public class TestPositionIncrement extends LuceneTestCase { private int i = 0; PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); - TermAttribute termAtt = addAttribute(TermAttribute.class); + CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); @Override @@ -82,7 +82,7 @@ public class TestPositionIncrement extends LuceneTestCase { if (i == TOKENS.length) return false; clearAttributes(); - termAtt.setTermBuffer(TOKENS[i]); + termAtt.append(TOKENS[i]); offsetAtt.setOffset(i,i); posIncrAtt.setPositionIncrement(INCREMENTS[i]); i++; @@ -347,7 +347,7 @@ class PayloadFilter extends TokenFilter { final PositionIncrementAttribute posIncrAttr; final PayloadAttribute payloadAttr; - final TermAttribute termAttr; + final CharTermAttribute termAttr; public PayloadFilter(TokenStream input, String fieldName) { super(input); @@ -356,7 +356,7 @@ class PayloadFilter extends TokenFilter { i = 0; posIncrAttr = input.addAttribute(PositionIncrementAttribute.class); payloadAttr = input.addAttribute(PayloadAttribute.class); - termAttr = input.addAttribute(TermAttribute.class); + termAttr = input.addAttribute(CharTermAttribute.class); } @Override @@ -372,7 +372,7 @@ class PayloadFilter extends TokenFilter { posIncrAttr.setPositionIncrement(posIncr); pos += posIncr; if (TestPositionIncrement.VERBOSE) { - System.out.println("term=" + termAttr.term() + " pos=" + pos); + System.out.println("term=" + termAttr + " pos=" + pos); } i++; return true; diff --git a/lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java b/lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java index a75831a2d83..f8387dcbbe0 100644 --- a/lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java @@ -27,7 +27,7 @@ import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.LuceneTestCase; import java.io.IOException; @@ -280,27 +280,25 @@ public class TestTermRangeQuery extends LuceneTestCase { private static class SingleCharTokenizer extends Tokenizer { char[] buffer = new char[1]; - boolean done; - TermAttribute termAtt; + boolean done = false; + CharTermAttribute termAtt; public SingleCharTokenizer(Reader r) { super(r); - termAtt = addAttribute(TermAttribute.class); + termAtt = addAttribute(CharTermAttribute.class); } @Override public boolean incrementToken() throws IOException { - int count = input.read(buffer); if (done) return false; else { + int count = input.read(buffer); clearAttributes(); done = true; if (count == 1) { - termAtt.termBuffer()[0] = buffer[0]; - termAtt.setTermLength(1); - } else - termAtt.setTermLength(0); + termAtt.copyBuffer(buffer, 0, 1); + } return true; } } diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java b/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java index d548a72b9d0..44bf6e9fb11 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java @@ -29,7 +29,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; @@ -479,7 +479,7 @@ public class TestPayloadSpans extends LuceneTestCase { Set nopayload = new HashSet(); int pos; PayloadAttribute payloadAtt; - TermAttribute termAtt; + CharTermAttribute termAtt; PositionIncrementAttribute posIncrAtt; public PayloadFilter(TokenStream input, String fieldName) { @@ -490,7 +490,7 @@ public class TestPayloadSpans extends LuceneTestCase { entities.add("one"); nopayload.add("nopayload"); nopayload.add("np"); - termAtt = addAttribute(TermAttribute.class); + termAtt = addAttribute(CharTermAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class); payloadAtt = addAttribute(PayloadAttribute.class); } @@ -498,7 +498,7 @@ public class TestPayloadSpans extends LuceneTestCase { @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - String token = new String(termAtt.termBuffer(), 0, termAtt.termLength()); + String token = termAtt.toString(); if (!nopayload.contains(token)) { if (entities.contains(token)) {