mirror of https://github.com/apache/lucene.git
LUCENE-2372: Convert core analyzers to CharTermAttribute. Also made rest of core analyzers final.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@932749 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
06283e9f63
commit
d02cbfe3c1
|
@ -97,6 +97,10 @@ Changes in backwards compatibility policy
|
|||
TODO: Point to new attribute inspection API coming with LUCENE-2374.
|
||||
(Uwe Schindler, Robert Muir)
|
||||
|
||||
* LUCENE-2372: StandardAnalyzer, KeywordAnalyzer, PerFieldAnalyzerWrapper
|
||||
are now final. Also removed the now obsolete and deprecated
|
||||
Analyzer.setOverridesTokenStreamMethod(). (Uwe Schindler)
|
||||
|
||||
Changes in runtime behavior
|
||||
|
||||
* LUCENE-1923: Made IndexReader.toString() produce something
|
||||
|
|
|
@ -120,6 +120,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
|
|||
String[] y = StandardTokenizer.TOKEN_TYPES;
|
||||
}
|
||||
|
||||
/* StandardAnalyzer was made final in 3.1:
|
||||
private static class MyStandardAnalyzer extends StandardAnalyzer {
|
||||
public MyStandardAnalyzer() {
|
||||
super(org.apache.lucene.util.Version.LUCENE_CURRENT);
|
||||
|
@ -139,6 +140,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
|
|||
assertTrue(ts.incrementToken());
|
||||
assertFalse(ts.incrementToken());
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
class PayloadSetter extends TokenFilter {
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.analysis;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
|
@ -61,18 +61,17 @@ public final class ASCIIFoldingFilter extends TokenFilter {
|
|||
public ASCIIFoldingFilter(TokenStream input)
|
||||
{
|
||||
super(input);
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
}
|
||||
|
||||
private char[] output = new char[512];
|
||||
private int outputPos;
|
||||
private TermAttribute termAtt;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
final char[] buffer = termAtt.termBuffer();
|
||||
final int length = termAtt.termLength();
|
||||
final char[] buffer = termAtt.buffer();
|
||||
final int length = termAtt.length();
|
||||
|
||||
// If no characters actually require rewriting then we
|
||||
// just return token as-is:
|
||||
|
@ -81,7 +80,7 @@ public final class ASCIIFoldingFilter extends TokenFilter {
|
|||
if (c >= '\u0080')
|
||||
{
|
||||
foldToASCII(buffer, length);
|
||||
termAtt.setTermBuffer(output, 0, outputPos);
|
||||
termAtt.copyBuffer(output, 0, outputPos);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -84,28 +84,6 @@ public abstract class Analyzer implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
private static final VirtualMethod<Analyzer> tokenStreamMethod =
|
||||
new VirtualMethod<Analyzer>(Analyzer.class, "tokenStream", String.class, Reader.class);
|
||||
private static final VirtualMethod<Analyzer> reusableTokenStreamMethod =
|
||||
new VirtualMethod<Analyzer>(Analyzer.class, "reusableTokenStream", String.class, Reader.class);
|
||||
|
||||
/** This field contains if the {@link #tokenStream} method was overridden in a
|
||||
* more far away subclass of {@code Analyzer} on the current instance's inheritance path.
|
||||
* If this field is {@code true}, {@link #reusableTokenStream} should delegate to {@link #tokenStream}
|
||||
* instead of using the own implementation.
|
||||
* @deprecated Please declare all implementations of {@link #reusableTokenStream} and {@link #tokenStream}
|
||||
* as {@code final}.
|
||||
*/
|
||||
@Deprecated
|
||||
protected final boolean overridesTokenStreamMethod =
|
||||
VirtualMethod.compareImplementationDistance(this.getClass(), tokenStreamMethod, reusableTokenStreamMethod) > 0;
|
||||
|
||||
/** @deprecated This is a no-op since Lucene 3.1. */
|
||||
@Deprecated
|
||||
protected void setOverridesTokenStreamMethod(Class<? extends Analyzer> baseClass) {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Invoked before indexing a Fieldable instance if
|
||||
* terms have already been added to that field. This allows custom
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.CharacterUtils;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
@ -78,10 +78,7 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
public CharTokenizer(Version matchVersion, Reader input) {
|
||||
super(input);
|
||||
charUtils = CharacterUtils.getInstance(matchVersion);
|
||||
offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
useOldAPI = useOldAPI(matchVersion);
|
||||
ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
|
||||
|
||||
}
|
||||
|
||||
|
@ -99,10 +96,7 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
Reader input) {
|
||||
super(source, input);
|
||||
charUtils = CharacterUtils.getInstance(matchVersion);
|
||||
offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
useOldAPI = useOldAPI(matchVersion);
|
||||
ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -119,10 +113,7 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
Reader input) {
|
||||
super(factory, input);
|
||||
charUtils = CharacterUtils.getInstance(matchVersion);
|
||||
offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
useOldAPI = useOldAPI(matchVersion);
|
||||
ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -164,11 +155,11 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
private static final int MAX_WORD_LEN = 255;
|
||||
private static final int IO_BUFFER_SIZE = 4096;
|
||||
|
||||
private final TermAttribute termAtt;
|
||||
private final OffsetAttribute offsetAtt;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);;
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
|
||||
private final CharacterUtils charUtils;
|
||||
private final CharacterBuffer ioBuffer;
|
||||
private final CharacterBuffer ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
|
||||
|
||||
/**
|
||||
* @deprecated this will be removed in lucene 4.0
|
||||
|
@ -275,7 +266,7 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
return incrementTokenOld();
|
||||
int length = 0;
|
||||
int start = bufferIndex;
|
||||
char[] buffer = termAtt.termBuffer();
|
||||
char[] buffer = termAtt.buffer();
|
||||
while (true) {
|
||||
if (bufferIndex >= dataLen) {
|
||||
offset += dataLen;
|
||||
|
@ -297,7 +288,7 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
if (length == 0) // start of token
|
||||
start = offset + bufferIndex - 1;
|
||||
else if (length >= buffer.length-1) // check if a supplementary could run out of bounds
|
||||
buffer = termAtt.resizeTermBuffer(2+length); // make sure a supplementary fits in the buffer
|
||||
buffer = termAtt.resizeBuffer(2+length); // make sure a supplementary fits in the buffer
|
||||
length += Character.toChars(normalize(c), buffer, length); // buffer it, normalized
|
||||
if (length >= MAX_WORD_LEN) // buffer overflow! make sure to check for >= surrogate pair could break == test
|
||||
break;
|
||||
|
@ -305,7 +296,7 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
break; // return 'em
|
||||
}
|
||||
|
||||
termAtt.setTermLength(length);
|
||||
termAtt.setLength(length);
|
||||
offsetAtt.setOffset(correctOffset(start), correctOffset(start+length));
|
||||
return true;
|
||||
|
||||
|
@ -320,7 +311,7 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
private boolean incrementTokenOld() throws IOException {
|
||||
int length = 0;
|
||||
int start = bufferIndex;
|
||||
char[] buffer = termAtt.termBuffer();
|
||||
char[] buffer = termAtt.buffer();
|
||||
final char[] oldIoBuffer = ioBuffer.getBuffer();
|
||||
while (true) {
|
||||
|
||||
|
@ -344,7 +335,7 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
if (length == 0) // start of token
|
||||
start = offset + bufferIndex - 1;
|
||||
else if (length == buffer.length)
|
||||
buffer = termAtt.resizeTermBuffer(1+length);
|
||||
buffer = termAtt.resizeBuffer(1+length);
|
||||
|
||||
buffer[length++] = normalize(c); // buffer it, normalized
|
||||
|
||||
|
@ -355,7 +346,7 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
break; // return 'em
|
||||
}
|
||||
|
||||
termAtt.setTermLength(length);
|
||||
termAtt.setLength(length);
|
||||
offsetAtt.setOffset(correctOffset(start), correctOffset(start+length));
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
package org.apache.lucene.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -19,6 +17,8 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
/**
|
||||
* A filter that replaces accented characters in the ISO Latin 1 character set
|
||||
* (ISO-8859-1) by their unaccented equivalent. The case will not be altered.
|
||||
|
@ -35,25 +35,24 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
|||
public final class ISOLatin1AccentFilter extends TokenFilter {
|
||||
public ISOLatin1AccentFilter(TokenStream input) {
|
||||
super(input);
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
}
|
||||
|
||||
private char[] output = new char[256];
|
||||
private int outputPos;
|
||||
private TermAttribute termAtt;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws java.io.IOException {
|
||||
if (input.incrementToken()) {
|
||||
final char[] buffer = termAtt.termBuffer();
|
||||
final int length = termAtt.termLength();
|
||||
final char[] buffer = termAtt.buffer();
|
||||
final int length = termAtt.length();
|
||||
// If no characters actually require rewriting then we
|
||||
// just return token as-is:
|
||||
for(int i=0;i<length;i++) {
|
||||
final char c = buffer[i];
|
||||
if (c >= '\u00c0' && c <= '\uFB06') {
|
||||
removeAccents(buffer, length);
|
||||
termAtt.setTermBuffer(output, 0, outputPos);
|
||||
termAtt.copyBuffer(output, 0, outputPos);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,36 +17,18 @@ package org.apache.lucene.analysis;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* "Tokenizes" the entire stream as a single token. This is useful
|
||||
* for data like zip codes, ids, and some product names.
|
||||
*/
|
||||
public class KeywordAnalyzer extends Analyzer {
|
||||
public final class KeywordAnalyzer extends ReusableAnalyzerBase {
|
||||
public KeywordAnalyzer() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName,
|
||||
final Reader reader) {
|
||||
return new KeywordTokenizer(reader);
|
||||
}
|
||||
@Override
|
||||
public TokenStream reusableTokenStream(String fieldName,
|
||||
final Reader reader) throws IOException {
|
||||
if (overridesTokenStreamMethod) {
|
||||
// LUCENE-1678: force fallback to tokenStream() if we
|
||||
// have been subclassed and that subclass overrides
|
||||
// tokenStream but not reusableTokenStream
|
||||
return tokenStream(fieldName, reader);
|
||||
}
|
||||
Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
|
||||
if (tokenizer == null) {
|
||||
tokenizer = new KeywordTokenizer(reader);
|
||||
setPreviousTokenStream(tokenizer);
|
||||
} else
|
||||
tokenizer.reset(reader);
|
||||
return tokenizer;
|
||||
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
|
||||
return new TokenStreamComponents(new KeywordTokenizer(reader));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
|
@ -33,8 +33,8 @@ import org.apache.lucene.util.Version;
|
|||
*/
|
||||
public final class KeywordMarkerTokenFilter extends TokenFilter {
|
||||
|
||||
private final KeywordAttribute keywordAttr;
|
||||
private final TermAttribute termAtt;
|
||||
private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final CharArraySet keywordSet;
|
||||
|
||||
/**
|
||||
|
@ -50,8 +50,6 @@ public final class KeywordMarkerTokenFilter extends TokenFilter {
|
|||
public KeywordMarkerTokenFilter(final TokenStream in,
|
||||
final CharArraySet keywordSet) {
|
||||
super(in);
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
keywordAttr = addAttribute(KeywordAttribute.class);
|
||||
this.keywordSet = keywordSet;
|
||||
}
|
||||
|
||||
|
@ -73,8 +71,8 @@ public final class KeywordMarkerTokenFilter extends TokenFilter {
|
|||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
keywordAttr.setKeyword(keywordSet.contains(termAtt.termBuffer(), 0,
|
||||
termAtt.termLength()));
|
||||
keywordAttr.setKeyword(keywordSet.contains(termAtt.buffer(), 0,
|
||||
termAtt.length()));
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
||||
/**
|
||||
|
@ -31,10 +31,10 @@ public final class KeywordTokenizer extends Tokenizer {
|
|||
|
||||
private static final int DEFAULT_BUFFER_SIZE = 256;
|
||||
|
||||
private boolean done;
|
||||
private boolean done = false;
|
||||
private int finalOffset;
|
||||
private TermAttribute termAtt;
|
||||
private OffsetAttribute offsetAtt;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
|
||||
public KeywordTokenizer(Reader input) {
|
||||
this(input, DEFAULT_BUFFER_SIZE);
|
||||
|
@ -42,24 +42,17 @@ public final class KeywordTokenizer extends Tokenizer {
|
|||
|
||||
public KeywordTokenizer(Reader input, int bufferSize) {
|
||||
super(input);
|
||||
init(bufferSize);
|
||||
termAtt.resizeBuffer(bufferSize);
|
||||
}
|
||||
|
||||
public KeywordTokenizer(AttributeSource source, Reader input, int bufferSize) {
|
||||
super(source, input);
|
||||
init(bufferSize);
|
||||
termAtt.resizeBuffer(bufferSize);
|
||||
}
|
||||
|
||||
public KeywordTokenizer(AttributeFactory factory, Reader input, int bufferSize) {
|
||||
super(factory, input);
|
||||
init(bufferSize);
|
||||
}
|
||||
|
||||
private void init(int bufferSize) {
|
||||
this.done = false;
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
termAtt.resizeTermBuffer(bufferSize);
|
||||
termAtt.resizeBuffer(bufferSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -68,15 +61,15 @@ public final class KeywordTokenizer extends Tokenizer {
|
|||
clearAttributes();
|
||||
done = true;
|
||||
int upto = 0;
|
||||
char[] buffer = termAtt.termBuffer();
|
||||
char[] buffer = termAtt.buffer();
|
||||
while (true) {
|
||||
final int length = input.read(buffer, upto, buffer.length-upto);
|
||||
if (length == -1) break;
|
||||
upto += length;
|
||||
if (upto == buffer.length)
|
||||
buffer = termAtt.resizeTermBuffer(1+buffer.length);
|
||||
buffer = termAtt.resizeBuffer(1+buffer.length);
|
||||
}
|
||||
termAtt.setTermLength(upto);
|
||||
termAtt.setLength(upto);
|
||||
finalOffset = correctOffset(upto);
|
||||
offsetAtt.setOffset(correctOffset(0), finalOffset);
|
||||
return true;
|
||||
|
|
|
@ -19,17 +19,17 @@ package org.apache.lucene.analysis;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
/**
|
||||
* Removes words that are too long or too short from the stream.
|
||||
*/
|
||||
public final class LengthFilter extends TokenFilter {
|
||||
|
||||
final int min;
|
||||
final int max;
|
||||
private final int min;
|
||||
private final int max;
|
||||
|
||||
private TermAttribute termAtt;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
||||
/**
|
||||
* Build a filter that removes words that are too long or too
|
||||
|
@ -40,7 +40,6 @@ public final class LengthFilter extends TokenFilter {
|
|||
super(in);
|
||||
this.min = min;
|
||||
this.max = max;
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -50,7 +49,7 @@ public final class LengthFilter extends TokenFilter {
|
|||
public final boolean incrementToken() throws IOException {
|
||||
// return the first non-stop word found
|
||||
while (input.incrementToken()) {
|
||||
int len = termAtt.termLength();
|
||||
int len = termAtt.length();
|
||||
if (len >= min && len <= max) {
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.analysis;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.util.CharacterUtils;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
@ -34,7 +34,8 @@ import org.apache.lucene.util.Version;
|
|||
*/
|
||||
public final class LowerCaseFilter extends TokenFilter {
|
||||
private final CharacterUtils charUtils;
|
||||
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
||||
/**
|
||||
* Create a new LowerCaseFilter, that normalizes token text to lower case.
|
||||
*
|
||||
|
@ -43,7 +44,6 @@ public final class LowerCaseFilter extends TokenFilter {
|
|||
*/
|
||||
public LowerCaseFilter(Version matchVersion, TokenStream in) {
|
||||
super(in);
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
charUtils = CharacterUtils.getInstance(matchVersion);
|
||||
}
|
||||
|
||||
|
@ -55,13 +55,11 @@ public final class LowerCaseFilter extends TokenFilter {
|
|||
this(Version.LUCENE_30, in);
|
||||
}
|
||||
|
||||
private TermAttribute termAtt;
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
final char[] buffer = termAtt.termBuffer();
|
||||
final int length = termAtt.termLength();
|
||||
final char[] buffer = termAtt.buffer();
|
||||
final int length = termAtt.length();
|
||||
for (int i = 0; i < length;) {
|
||||
i += Character.toChars(
|
||||
Character.toLowerCase(
|
||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.document.NumericField; // for javadocs
|
|||
import org.apache.lucene.search.NumericRangeQuery; // for javadocs
|
||||
import org.apache.lucene.search.NumericRangeFilter; // for javadocs
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
@ -118,11 +117,14 @@ public final class NumericTokenStream extends TokenStream {
|
|||
this.delegate = delegate;
|
||||
}
|
||||
|
||||
@Override
|
||||
@Override @SuppressWarnings("deprecation")
|
||||
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
|
||||
if (attClass == NumericTermAttribute.class)
|
||||
return new NumericTermAttributeImpl(ts);
|
||||
if (attClass.isAssignableFrom(CharTermAttribute.class) || attClass.isAssignableFrom(TermAttribute.class))
|
||||
if (attClass.isAssignableFrom(CharTermAttribute.class) ||
|
||||
// TODO: remove in 4.0 (deprecated class, also remove the suppress above):
|
||||
attClass.isAssignableFrom(org.apache.lucene.analysis.tokenattributes.TermAttribute.class)
|
||||
)
|
||||
throw new IllegalArgumentException("NumericTokenStream does not support CharTermAttribute/TermAttribute.");
|
||||
return delegate.createAttributeInstance(attClass);
|
||||
}
|
||||
|
|
|
@ -44,7 +44,7 @@ import java.util.HashMap;
|
|||
* <p>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
|
||||
* and query parsing.
|
||||
*/
|
||||
public class PerFieldAnalyzerWrapper extends Analyzer {
|
||||
public final class PerFieldAnalyzerWrapper extends Analyzer {
|
||||
private Analyzer defaultAnalyzer;
|
||||
private Map<String,Analyzer> analyzerMap = new HashMap<String,Analyzer>();
|
||||
|
||||
|
@ -99,12 +99,6 @@ public class PerFieldAnalyzerWrapper extends Analyzer {
|
|||
|
||||
@Override
|
||||
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
|
||||
if (overridesTokenStreamMethod) {
|
||||
// LUCENE-1678: force fallback to tokenStream() if we
|
||||
// have been subclassed and that subclass overrides
|
||||
// tokenStream but not reusableTokenStream
|
||||
return tokenStream(fieldName, reader);
|
||||
}
|
||||
Analyzer analyzer = analyzerMap.get(fieldName);
|
||||
if (analyzer == null)
|
||||
analyzer = defaultAnalyzer;
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.analysis;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
/** Transforms the token stream as per the Porter stemming algorithm.
|
||||
Note: the input to the stemming filter must already be in lower case,
|
||||
|
@ -47,15 +47,12 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
|||
</p>
|
||||
*/
|
||||
public final class PorterStemFilter extends TokenFilter {
|
||||
private final PorterStemmer stemmer;
|
||||
private final TermAttribute termAtt;
|
||||
private final KeywordAttribute keywordAttr;
|
||||
private final PorterStemmer stemmer = new PorterStemmer();
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
|
||||
|
||||
public PorterStemFilter(TokenStream in) {
|
||||
super(in);
|
||||
stemmer = new PorterStemmer();
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
keywordAttr = addAttribute(KeywordAttribute.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -63,8 +60,8 @@ public final class PorterStemFilter extends TokenFilter {
|
|||
if (!input.incrementToken())
|
||||
return false;
|
||||
|
||||
if ((!keywordAttr.isKeyword()) && stemmer.stem(termAtt.termBuffer(), 0, termAtt.termLength()))
|
||||
termAtt.setTermBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength());
|
||||
if ((!keywordAttr.isKeyword()) && stemmer.stem(termAtt.buffer(), 0, termAtt.length()))
|
||||
termAtt.copyBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -100,8 +100,8 @@ public abstract class ReusableAnalyzerBase extends Analyzer {
|
|||
* {@link Analyzer#reusableTokenStream(String, Reader)}.
|
||||
*/
|
||||
public static class TokenStreamComponents {
|
||||
final Tokenizer source;
|
||||
final TokenStream sink;
|
||||
protected final Tokenizer source;
|
||||
protected final TokenStream sink;
|
||||
|
||||
/**
|
||||
* Creates a new {@link TokenStreamComponents} instance.
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Set;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.queryParser.QueryParser; // for javadoc
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
@ -44,8 +44,8 @@ public final class StopFilter extends TokenFilter {
|
|||
private final CharArraySet stopWords;
|
||||
private boolean enablePositionIncrements = false;
|
||||
|
||||
private TermAttribute termAtt;
|
||||
private PositionIncrementAttribute posIncrAtt;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
/**
|
||||
* Construct a token stream filtering the given input.
|
||||
|
@ -104,8 +104,6 @@ public final class StopFilter extends TokenFilter {
|
|||
super(input);
|
||||
this.stopWords = stopWords instanceof CharArraySet ? (CharArraySet)stopWords : new CharArraySet(matchVersion, stopWords, ignoreCase);
|
||||
this.enablePositionIncrements = enablePositionIncrements;
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -257,7 +255,7 @@ public final class StopFilter extends TokenFilter {
|
|||
// return the first non-stop word found
|
||||
int skippedPositions = 0;
|
||||
while (input.incrementToken()) {
|
||||
if (!stopWords.contains(termAtt.termBuffer(), 0, termAtt.termLength())) {
|
||||
if (!stopWords.contains(termAtt.buffer(), 0, termAtt.length())) {
|
||||
if (enablePositionIncrements) {
|
||||
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
|
||||
}
|
||||
|
|
|
@ -42,8 +42,12 @@ import java.util.Set;
|
|||
* are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
|
||||
* </ul>
|
||||
*/
|
||||
public class StandardAnalyzer extends Analyzer {
|
||||
private Set<?> stopSet;
|
||||
public final class StandardAnalyzer extends StopwordAnalyzerBase {
|
||||
|
||||
/** Default maximum allowed token length */
|
||||
public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
|
||||
|
||||
private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
|
||||
|
||||
/**
|
||||
* Specifies whether deprecated acronyms should be replaced with HOST type.
|
||||
|
@ -54,7 +58,15 @@ public class StandardAnalyzer extends Analyzer {
|
|||
/** An unmodifiable set containing some common English words that are usually not
|
||||
useful for searching. */
|
||||
public static final Set<?> STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
|
||||
private final Version matchVersion;
|
||||
|
||||
/** Builds an analyzer with the given stop words.
|
||||
* @param matchVersion Lucene version to match See {@link
|
||||
* <a href="#version">above</a>}
|
||||
* @param stopWords stop words */
|
||||
public StandardAnalyzer(Version matchVersion, Set<?> stopWords) {
|
||||
super(matchVersion, stopWords);
|
||||
replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the default stop words ({@link
|
||||
* #STOP_WORDS_SET}).
|
||||
|
@ -65,16 +77,6 @@ public class StandardAnalyzer extends Analyzer {
|
|||
this(matchVersion, STOP_WORDS_SET);
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the given stop words.
|
||||
* @param matchVersion Lucene version to match See {@link
|
||||
* <a href="#version">above</a>}
|
||||
* @param stopWords stop words */
|
||||
public StandardAnalyzer(Version matchVersion, Set<?> stopWords) {
|
||||
stopSet = stopWords;
|
||||
replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the stop words from the given file.
|
||||
* @see WordlistLoader#getWordSet(File)
|
||||
* @param matchVersion Lucene version to match See {@link
|
||||
|
@ -93,28 +95,6 @@ public class StandardAnalyzer extends Analyzer {
|
|||
this(matchVersion, WordlistLoader.getWordSet(stopwords));
|
||||
}
|
||||
|
||||
/** Constructs a {@link StandardTokenizer} filtered by a {@link
|
||||
StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
StandardTokenizer tokenStream = new StandardTokenizer(matchVersion, reader);
|
||||
tokenStream.setMaxTokenLength(maxTokenLength);
|
||||
TokenStream result = new StandardFilter(tokenStream);
|
||||
result = new LowerCaseFilter(matchVersion, result);
|
||||
result = new StopFilter(matchVersion, result, stopSet);
|
||||
return result;
|
||||
}
|
||||
|
||||
private static final class SavedStreams {
|
||||
StandardTokenizer tokenStream;
|
||||
TokenStream filteredTokenStream;
|
||||
}
|
||||
|
||||
/** Default maximum allowed token length */
|
||||
public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
|
||||
|
||||
private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
|
||||
|
||||
/**
|
||||
* Set maximum allowed token length. If a token is seen
|
||||
* that exceeds this length then it is discarded. This
|
||||
|
@ -133,29 +113,19 @@ public class StandardAnalyzer extends Analyzer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
|
||||
if (overridesTokenStreamMethod) {
|
||||
// LUCENE-1678: force fallback to tokenStream() if we
|
||||
// have been subclassed and that subclass overrides
|
||||
// tokenStream but not reusableTokenStream
|
||||
return tokenStream(fieldName, reader);
|
||||
}
|
||||
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
|
||||
if (streams == null) {
|
||||
streams = new SavedStreams();
|
||||
setPreviousTokenStream(streams);
|
||||
streams.tokenStream = new StandardTokenizer(matchVersion, reader);
|
||||
streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
|
||||
streams.filteredTokenStream = new LowerCaseFilter(matchVersion,
|
||||
streams.filteredTokenStream);
|
||||
streams.filteredTokenStream = new StopFilter(matchVersion, streams.filteredTokenStream, stopSet);
|
||||
} else {
|
||||
streams.tokenStream.reset(reader);
|
||||
}
|
||||
streams.tokenStream.setMaxTokenLength(maxTokenLength);
|
||||
|
||||
streams.tokenStream.setReplaceInvalidAcronym(replaceInvalidAcronym);
|
||||
|
||||
return streams.filteredTokenStream;
|
||||
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
|
||||
final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
|
||||
src.setMaxTokenLength(maxTokenLength);
|
||||
src.setReplaceInvalidAcronym(replaceInvalidAcronym);
|
||||
TokenStream tok = new StandardFilter(src);
|
||||
tok = new LowerCaseFilter(matchVersion, tok);
|
||||
tok = new StopFilter(matchVersion, tok, stopwords);
|
||||
return new TokenStreamComponents(src, tok) {
|
||||
@Override
|
||||
protected boolean reset(final Reader reader) throws IOException {
|
||||
src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
|
||||
return super.reset(reader);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,27 +19,24 @@ package org.apache.lucene.analysis.standard;
|
|||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
|
||||
/** Normalizes tokens extracted with {@link StandardTokenizer}. */
|
||||
|
||||
public final class StandardFilter extends TokenFilter {
|
||||
|
||||
|
||||
/** Construct filtering <i>in</i>. */
|
||||
public StandardFilter(TokenStream in) {
|
||||
super(in);
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
typeAtt = addAttribute(TypeAttribute.class);
|
||||
}
|
||||
|
||||
private static final String APOSTROPHE_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.APOSTROPHE];
|
||||
private static final String ACRONYM_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ACRONYM];
|
||||
|
||||
// this filters uses attribute type
|
||||
private final TypeAttribute typeAtt;
|
||||
private final TermAttribute termAtt;
|
||||
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
||||
/** Returns the next token in the stream, or null at EOS.
|
||||
* <p>Removes <tt>'s</tt> from the end of words.
|
||||
|
@ -51,16 +48,16 @@ public final class StandardFilter extends TokenFilter {
|
|||
return false;
|
||||
}
|
||||
|
||||
char[] buffer = termAtt.termBuffer();
|
||||
final int bufferLength = termAtt.termLength();
|
||||
final char[] buffer = termAtt.buffer();
|
||||
final int bufferLength = termAtt.length();
|
||||
final String type = typeAtt.type();
|
||||
|
||||
if (type == APOSTROPHE_TYPE && // remove 's
|
||||
bufferLength >= 2 &&
|
||||
bufferLength >= 2 &&
|
||||
buffer[bufferLength-2] == '\'' &&
|
||||
(buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S')) {
|
||||
// Strip last 2 characters off
|
||||
termAtt.setTermLength(bufferLength - 2);
|
||||
termAtt.setLength(bufferLength - 2);
|
||||
} else if (type == ACRONYM_TYPE) { // remove dots
|
||||
int upto = 0;
|
||||
for(int i=0;i<bufferLength;i++) {
|
||||
|
@ -68,7 +65,7 @@ public final class StandardFilter extends TokenFilter {
|
|||
if (c != '.')
|
||||
buffer[upto++] = c;
|
||||
}
|
||||
termAtt.setTermLength(upto);
|
||||
termAtt.setLength(upto);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.collation;
|
|||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.util.IndexableBinaryStringTools;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -73,8 +73,8 @@ import java.text.Collator;
|
|||
* </p>
|
||||
*/
|
||||
public final class CollationKeyFilter extends TokenFilter {
|
||||
private Collator collator = null;
|
||||
private TermAttribute termAtt;
|
||||
private final Collator collator;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
||||
/**
|
||||
* @param input Source token stream
|
||||
|
@ -83,23 +83,18 @@ public final class CollationKeyFilter extends TokenFilter {
|
|||
public CollationKeyFilter(TokenStream input, Collator collator) {
|
||||
super(input);
|
||||
this.collator = collator;
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
char[] termBuffer = termAtt.termBuffer();
|
||||
String termText = new String(termBuffer, 0, termAtt.termLength());
|
||||
byte[] collationKey = collator.getCollationKey(termText).toByteArray();
|
||||
byte[] collationKey = collator.getCollationKey(termAtt.toString()).toByteArray();
|
||||
int encodedLength = IndexableBinaryStringTools.getEncodedLength(
|
||||
collationKey, 0, collationKey.length);
|
||||
if (encodedLength > termBuffer.length) {
|
||||
termAtt.resizeTermBuffer(encodedLength);
|
||||
}
|
||||
termAtt.setTermLength(encodedLength);
|
||||
termAtt.resizeBuffer(encodedLength);
|
||||
termAtt.setLength(encodedLength);
|
||||
IndexableBinaryStringTools.encode(collationKey, 0, collationKey.length,
|
||||
termAtt.termBuffer(), 0, encodedLength);
|
||||
termAtt.buffer(), 0, encodedLength);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
|
|
|
@ -17,7 +17,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.CachingTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.DateField;
|
||||
import org.apache.lucene.document.DateTools;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -522,7 +522,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
source = analyzer.tokenStream(field, new StringReader(queryText));
|
||||
}
|
||||
CachingTokenFilter buffer = new CachingTokenFilter(source);
|
||||
TermAttribute termAtt = null;
|
||||
CharTermAttribute termAtt = null;
|
||||
PositionIncrementAttribute posIncrAtt = null;
|
||||
int numTokens = 0;
|
||||
|
||||
|
@ -534,8 +534,8 @@ public class QueryParser implements QueryParserConstants {
|
|||
// success==false if we hit an exception
|
||||
}
|
||||
if (success) {
|
||||
if (buffer.hasAttribute(TermAttribute.class)) {
|
||||
termAtt = buffer.getAttribute(TermAttribute.class);
|
||||
if (buffer.hasAttribute(CharTermAttribute.class)) {
|
||||
termAtt = buffer.getAttribute(CharTermAttribute.class);
|
||||
}
|
||||
if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
|
||||
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
|
||||
|
@ -581,7 +581,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
try {
|
||||
boolean hasNext = buffer.incrementToken();
|
||||
assert hasNext == true;
|
||||
term = termAtt.term();
|
||||
term = termAtt.toString();
|
||||
} catch (IOException e) {
|
||||
// safe to ignore, because we know the number of tokens
|
||||
}
|
||||
|
@ -596,7 +596,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
try {
|
||||
boolean hasNext = buffer.incrementToken();
|
||||
assert hasNext == true;
|
||||
term = termAtt.term();
|
||||
term = termAtt.toString();
|
||||
} catch (IOException e) {
|
||||
// safe to ignore, because we know the number of tokens
|
||||
}
|
||||
|
@ -619,7 +619,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
try {
|
||||
boolean hasNext = buffer.incrementToken();
|
||||
assert hasNext == true;
|
||||
term = termAtt.term();
|
||||
term = termAtt.toString();
|
||||
if (posIncrAtt != null) {
|
||||
positionIncrement = posIncrAtt.getPositionIncrement();
|
||||
}
|
||||
|
@ -659,7 +659,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
try {
|
||||
boolean hasNext = buffer.incrementToken();
|
||||
assert hasNext == true;
|
||||
term = termAtt.term();
|
||||
term = termAtt.toString();
|
||||
if (posIncrAtt != null) {
|
||||
positionIncrement = posIncrAtt.getPositionIncrement();
|
||||
}
|
||||
|
|
|
@ -41,7 +41,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.CachingTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.DateField;
|
||||
import org.apache.lucene.document.DateTools;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -546,7 +546,7 @@ public class QueryParser {
|
|||
source = analyzer.tokenStream(field, new StringReader(queryText));
|
||||
}
|
||||
CachingTokenFilter buffer = new CachingTokenFilter(source);
|
||||
TermAttribute termAtt = null;
|
||||
CharTermAttribute termAtt = null;
|
||||
PositionIncrementAttribute posIncrAtt = null;
|
||||
int numTokens = 0;
|
||||
|
||||
|
@ -558,8 +558,8 @@ public class QueryParser {
|
|||
// success==false if we hit an exception
|
||||
}
|
||||
if (success) {
|
||||
if (buffer.hasAttribute(TermAttribute.class)) {
|
||||
termAtt = buffer.getAttribute(TermAttribute.class);
|
||||
if (buffer.hasAttribute(CharTermAttribute.class)) {
|
||||
termAtt = buffer.getAttribute(CharTermAttribute.class);
|
||||
}
|
||||
if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
|
||||
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
|
||||
|
@ -605,7 +605,7 @@ public class QueryParser {
|
|||
try {
|
||||
boolean hasNext = buffer.incrementToken();
|
||||
assert hasNext == true;
|
||||
term = termAtt.term();
|
||||
term = termAtt.toString();
|
||||
} catch (IOException e) {
|
||||
// safe to ignore, because we know the number of tokens
|
||||
}
|
||||
|
@ -620,7 +620,7 @@ public class QueryParser {
|
|||
try {
|
||||
boolean hasNext = buffer.incrementToken();
|
||||
assert hasNext == true;
|
||||
term = termAtt.term();
|
||||
term = termAtt.toString();
|
||||
} catch (IOException e) {
|
||||
// safe to ignore, because we know the number of tokens
|
||||
}
|
||||
|
@ -643,7 +643,7 @@ public class QueryParser {
|
|||
try {
|
||||
boolean hasNext = buffer.incrementToken();
|
||||
assert hasNext == true;
|
||||
term = termAtt.term();
|
||||
term = termAtt.toString();
|
||||
if (posIncrAtt != null) {
|
||||
positionIncrement = posIncrAtt.getPositionIncrement();
|
||||
}
|
||||
|
@ -683,7 +683,7 @@ public class QueryParser {
|
|||
try {
|
||||
boolean hasNext = buffer.incrementToken();
|
||||
assert hasNext == true;
|
||||
term = termAtt.term();
|
||||
term = termAtt.toString();
|
||||
if (posIncrAtt != null) {
|
||||
positionIncrement = posIncrAtt.getPositionIncrement();
|
||||
}
|
||||
|
|
|
@ -15,7 +15,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.CachingTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.DateField;
|
||||
import org.apache.lucene.document.DateTools;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
|
|
@ -28,7 +28,7 @@ import java.util.Map;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.index.TermFreqVector;
|
||||
|
||||
/**
|
||||
|
@ -61,11 +61,11 @@ public class QueryTermVector implements TermFreqVector {
|
|||
boolean hasMoreTokens = false;
|
||||
|
||||
stream.reset();
|
||||
TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
|
||||
final CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
|
||||
|
||||
hasMoreTokens = stream.incrementToken();
|
||||
while (hasMoreTokens) {
|
||||
terms.add(termAtt.term());
|
||||
terms.add(termAtt.toString());
|
||||
hasMoreTokens = stream.incrementToken();
|
||||
}
|
||||
processTerms(terms.toArray(new String[terms.size()]));
|
||||
|
|
|
@ -83,8 +83,8 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
|
|||
assertNotNull(output);
|
||||
CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class);
|
||||
|
||||
assertTrue("has no TermAttribute", ts.hasAttribute(TermAttribute.class));
|
||||
TermAttribute termAtt = ts.getAttribute(TermAttribute.class);
|
||||
assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class));
|
||||
CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
|
||||
|
||||
OffsetAttribute offsetAtt = null;
|
||||
if (startOffsets != null || endOffsets != null || finalOffset != null) {
|
||||
|
@ -108,7 +108,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
|
|||
for (int i = 0; i < output.length; i++) {
|
||||
// extra safety to enforce, that the state is not preserved and also assign bogus values
|
||||
ts.clearAttributes();
|
||||
termAtt.setTermBuffer("bogusTerm");
|
||||
termAtt.setEmpty().append("bogusTerm");
|
||||
if (offsetAtt != null) offsetAtt.setOffset(14584724,24683243);
|
||||
if (typeAtt != null) typeAtt.setType("bogusType");
|
||||
if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
|
||||
|
@ -117,7 +117,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
|
|||
assertTrue("token "+i+" does not exist", ts.incrementToken());
|
||||
assertTrue("clearAttributes() was not called correctly in TokenStream chain", checkClearAtt.getAndResetClearCalled());
|
||||
|
||||
assertEquals("term "+i, output[i], termAtt.term());
|
||||
assertEquals("term "+i, output[i], termAtt.toString());
|
||||
if (startOffsets != null)
|
||||
assertEquals("startOffset "+i, startOffsets[i], offsetAtt.startOffset());
|
||||
if (endOffsets != null)
|
||||
|
|
|
@ -17,7 +17,7 @@ package org.apache.lucene.analysis;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import java.io.StringReader;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
@ -33,7 +33,7 @@ public class TestASCIIFoldingFilter extends BaseTokenStreamTestCase {
|
|||
+" ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl"));
|
||||
ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream);
|
||||
|
||||
TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
|
||||
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
|
||||
|
||||
assertTermEquals("Des", filter, termAtt);
|
||||
assertTermEquals("mot", filter, termAtt);
|
||||
|
@ -1890,7 +1890,7 @@ public class TestASCIIFoldingFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(inputText.toString()));
|
||||
ASCIIFoldingFilter filter = new ASCIIFoldingFilter(stream);
|
||||
TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
|
||||
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
|
||||
Iterator<String> expectedIter = expectedOutputTokens.iterator();
|
||||
while (expectedIter.hasNext()) {
|
||||
assertTermEquals(expectedIter.next(), filter, termAtt);
|
||||
|
@ -1898,8 +1898,8 @@ public class TestASCIIFoldingFilter extends BaseTokenStreamTestCase {
|
|||
assertFalse(filter.incrementToken());
|
||||
}
|
||||
|
||||
void assertTermEquals(String expected, TokenStream stream, TermAttribute termAtt) throws Exception {
|
||||
void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt) throws Exception {
|
||||
assertTrue(stream.incrementToken());
|
||||
assertEquals(expected, termAtt.term());
|
||||
assertEquals(expected, termAtt.toString());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ import java.io.Reader;
|
|||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.index.Payload;
|
||||
|
||||
public class TestAnalyzers extends BaseTokenStreamTestCase {
|
||||
|
@ -120,26 +120,6 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
|
|||
String[] y = StandardTokenizer.TOKEN_TYPES;
|
||||
}
|
||||
|
||||
private static class MyStandardAnalyzer extends StandardAnalyzer {
|
||||
public MyStandardAnalyzer() {
|
||||
super(TEST_VERSION_CURRENT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String field, Reader reader) {
|
||||
return new WhitespaceAnalyzer(TEST_VERSION_CURRENT).tokenStream(field, reader);
|
||||
}
|
||||
}
|
||||
|
||||
public void testSubclassOverridingOnlyTokenStream() throws Throwable {
|
||||
Analyzer a = new MyStandardAnalyzer();
|
||||
TokenStream ts = a.reusableTokenStream("field", new StringReader("the"));
|
||||
// StandardAnalyzer will discard "the" (it's a
|
||||
// stopword), by my subclass will not:
|
||||
assertTrue(ts.incrementToken());
|
||||
assertFalse(ts.incrementToken());
|
||||
}
|
||||
|
||||
private static class LowerCaseWhitespaceAnalyzer extends Analyzer {
|
||||
|
||||
@Override
|
||||
|
@ -202,8 +182,8 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
|
|||
String highSurEndingLower = "bogustermboguster\ud801";
|
||||
tokenizer.reset(new StringReader(highSurEndingUpper));
|
||||
assertTokenStreamContents(filter, new String[] {highSurEndingLower});
|
||||
assertTrue(filter.hasAttribute(TermAttribute.class));
|
||||
char[] termBuffer = filter.getAttribute(TermAttribute.class).termBuffer();
|
||||
assertTrue(filter.hasAttribute(CharTermAttribute.class));
|
||||
char[] termBuffer = filter.getAttribute(CharTermAttribute.class).buffer();
|
||||
int length = highSurEndingLower.length();
|
||||
assertEquals('\ud801', termBuffer[length - 1]);
|
||||
assertEquals('\udc3e', termBuffer[length]);
|
||||
|
|
|
@ -21,7 +21,7 @@ package org.apache.lucene.analysis;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.TermVector;
|
||||
|
@ -43,7 +43,7 @@ public class TestCachingTokenFilter extends BaseTokenStreamTestCase {
|
|||
Document doc = new Document();
|
||||
TokenStream stream = new TokenStream() {
|
||||
private int index = 0;
|
||||
private TermAttribute termAtt = addAttribute(TermAttribute.class);
|
||||
private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
|
||||
@Override
|
||||
|
@ -52,7 +52,7 @@ public class TestCachingTokenFilter extends BaseTokenStreamTestCase {
|
|||
return false;
|
||||
} else {
|
||||
clearAttributes();
|
||||
termAtt.setTermBuffer(tokens[index++]);
|
||||
termAtt.append(tokens[index++]);
|
||||
offsetAtt.setOffset(0,0);
|
||||
return true;
|
||||
}
|
||||
|
@ -100,11 +100,10 @@ public class TestCachingTokenFilter extends BaseTokenStreamTestCase {
|
|||
private void checkTokens(TokenStream stream) throws IOException {
|
||||
int count = 0;
|
||||
|
||||
TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
|
||||
assertNotNull(termAtt);
|
||||
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
||||
while (stream.incrementToken()) {
|
||||
assertTrue(count < tokens.length);
|
||||
assertEquals(tokens[count], termAtt.term());
|
||||
assertEquals(tokens[count], termAtt.toString());
|
||||
count++;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,14 +17,14 @@ package org.apache.lucene.analysis;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import java.io.StringReader;
|
||||
|
||||
public class TestISOLatin1AccentFilter extends BaseTokenStreamTestCase {
|
||||
public void testU() throws Exception {
|
||||
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Des mot clés À LA CHAÎNE À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï IJ Ð Ñ Ò Ó Ô Õ Ö Ø Œ Þ Ù Ú Û Ü Ý Ÿ à á â ã ä å æ ç è é ê ë ì í î ï ij ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl"));
|
||||
ISOLatin1AccentFilter filter = new ISOLatin1AccentFilter(stream);
|
||||
TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
|
||||
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
|
||||
assertTermEquals("Des", filter, termAtt);
|
||||
assertTermEquals("mot", filter, termAtt);
|
||||
assertTermEquals("cles", filter, termAtt);
|
||||
|
@ -103,8 +103,8 @@ public class TestISOLatin1AccentFilter extends BaseTokenStreamTestCase {
|
|||
assertFalse(filter.incrementToken());
|
||||
}
|
||||
|
||||
void assertTermEquals(String expected, TokenStream stream, TermAttribute termAtt) throws Exception {
|
||||
void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt) throws Exception {
|
||||
assertTrue(stream.incrementToken());
|
||||
assertEquals(expected, termAtt.term());
|
||||
assertEquals(expected, termAtt.toString());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@ import java.util.HashSet;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
|
@ -53,20 +53,20 @@ public class TestKeywordMarkerTokenFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
public static class LowerCaseFilterMock extends TokenFilter {
|
||||
|
||||
private TermAttribute termAtt;
|
||||
private KeywordAttribute keywordAttr;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
|
||||
|
||||
public LowerCaseFilterMock(TokenStream in) {
|
||||
super(in);
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
keywordAttr = addAttribute(KeywordAttribute.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
if (!keywordAttr.isKeyword())
|
||||
termAtt.setTermBuffer(termAtt.term().toLowerCase());
|
||||
if (!keywordAttr.isKeyword()) {
|
||||
final String term = termAtt.toString().toLowerCase();
|
||||
termAtt.setEmpty().append(term);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
|
@ -17,7 +17,7 @@ package org.apache.lucene.analysis;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import java.io.StringReader;
|
||||
|
||||
public class TestLengthFilter extends BaseTokenStreamTestCase {
|
||||
|
@ -26,14 +26,14 @@ public class TestLengthFilter extends BaseTokenStreamTestCase {
|
|||
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
||||
new StringReader("short toolong evenmuchlongertext a ab toolong foo"));
|
||||
LengthFilter filter = new LengthFilter(stream, 2, 6);
|
||||
TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
|
||||
CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
|
||||
|
||||
assertTrue(filter.incrementToken());
|
||||
assertEquals("short", termAtt.term());
|
||||
assertEquals("short", termAtt.toString());
|
||||
assertTrue(filter.incrementToken());
|
||||
assertEquals("ab", termAtt.term());
|
||||
assertEquals("ab", termAtt.toString());
|
||||
assertTrue(filter.incrementToken());
|
||||
assertEquals("foo", termAtt.term());
|
||||
assertEquals("foo", termAtt.toString());
|
||||
assertFalse(filter.incrementToken());
|
||||
}
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ package org.apache.lucene.analysis;
|
|||
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -30,19 +30,19 @@ public class TestPerFieldAnalzyerWrapper extends BaseTokenStreamTestCase {
|
|||
|
||||
TokenStream tokenStream = analyzer.tokenStream("field",
|
||||
new StringReader(text));
|
||||
TermAttribute termAtt = tokenStream.getAttribute(TermAttribute.class);
|
||||
CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
|
||||
|
||||
assertTrue(tokenStream.incrementToken());
|
||||
assertEquals("WhitespaceAnalyzer does not lowercase",
|
||||
"Qwerty",
|
||||
termAtt.term());
|
||||
termAtt.toString());
|
||||
|
||||
tokenStream = analyzer.tokenStream("special",
|
||||
new StringReader(text));
|
||||
termAtt = tokenStream.getAttribute(TermAttribute.class);
|
||||
termAtt = tokenStream.getAttribute(CharTermAttribute.class);
|
||||
assertTrue(tokenStream.incrementToken());
|
||||
assertEquals("SimpleAnalyzer lowercases",
|
||||
"qwerty",
|
||||
termAtt.term());
|
||||
termAtt.toString());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.analysis;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.StringReader;
|
||||
|
@ -51,10 +51,10 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
|||
StringReader reader = new StringReader("This is a test of the english stop analyzer");
|
||||
TokenStream stream = stop.tokenStream("test", reader);
|
||||
assertTrue(stream != null);
|
||||
TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
|
||||
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
||||
|
||||
while (stream.incrementToken()) {
|
||||
assertFalse(inValidTokens.contains(termAtt.term()));
|
||||
assertFalse(inValidTokens.contains(termAtt.toString()));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -67,11 +67,11 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
|||
StringReader reader = new StringReader("This is a good test of the english stop analyzer");
|
||||
TokenStream stream = newStop.tokenStream("test", reader);
|
||||
assertNotNull(stream);
|
||||
TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
|
||||
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
||||
PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
while (stream.incrementToken()) {
|
||||
String text = termAtt.term();
|
||||
String text = termAtt.toString();
|
||||
assertFalse(stopWordsSet.contains(text));
|
||||
assertEquals(1,posIncrAtt.getPositionIncrement()); // in 2.4 stop tokenizer does not apply increments.
|
||||
}
|
||||
|
@ -88,11 +88,11 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
|||
TokenStream stream = newStop.tokenStream("test", reader);
|
||||
assertNotNull(stream);
|
||||
int i = 0;
|
||||
TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
|
||||
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
||||
PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
while (stream.incrementToken()) {
|
||||
String text = termAtt.term();
|
||||
String text = termAtt.toString();
|
||||
assertFalse(stopWordsSet.contains(text));
|
||||
assertEquals(expectedIncr[i++],posIncrAtt.getPositionIncrement());
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@ package org.apache.lucene.analysis;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.util.English;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
@ -37,11 +37,11 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
|||
StringReader reader = new StringReader("Now is The Time");
|
||||
Set<String> stopWords = new HashSet<String>(Arrays.asList("is", "the", "Time"));
|
||||
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, false);
|
||||
final TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
|
||||
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
||||
assertTrue(stream.incrementToken());
|
||||
assertEquals("Now", termAtt.term());
|
||||
assertEquals("Now", termAtt.toString());
|
||||
assertTrue(stream.incrementToken());
|
||||
assertEquals("The", termAtt.term());
|
||||
assertEquals("The", termAtt.toString());
|
||||
assertFalse(stream.incrementToken());
|
||||
}
|
||||
|
||||
|
@ -49,9 +49,9 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
|||
StringReader reader = new StringReader("Now is The Time");
|
||||
Set<Object> stopWords = new HashSet<Object>(Arrays.asList( "is", "the", "Time" ));
|
||||
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopWords, true);
|
||||
final TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
|
||||
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
||||
assertTrue(stream.incrementToken());
|
||||
assertEquals("Now", termAtt.term());
|
||||
assertEquals("Now", termAtt.toString());
|
||||
assertFalse(stream.incrementToken());
|
||||
}
|
||||
|
||||
|
@ -60,11 +60,11 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
|||
String[] stopWords = new String[] { "is", "the", "Time" };
|
||||
Set<Object> stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, stopWords);
|
||||
TokenStream stream = new StopFilter(TEST_VERSION_CURRENT, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), stopSet);
|
||||
final TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
|
||||
final CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
||||
assertTrue(stream.incrementToken());
|
||||
assertEquals("Now", termAtt.term());
|
||||
assertEquals("Now", termAtt.toString());
|
||||
assertTrue(stream.incrementToken());
|
||||
assertEquals("The", termAtt.term());
|
||||
assertEquals("The", termAtt.toString());
|
||||
assertFalse(stream.incrementToken());
|
||||
}
|
||||
|
||||
|
@ -117,13 +117,13 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
|||
private void doTestStopPositons(StopFilter stpf, boolean enableIcrements) throws IOException {
|
||||
log("---> test with enable-increments-"+(enableIcrements?"enabled":"disabled"));
|
||||
stpf.setEnablePositionIncrements(enableIcrements);
|
||||
TermAttribute termAtt = stpf.getAttribute(TermAttribute.class);
|
||||
CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
|
||||
PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
|
||||
for (int i=0; i<20; i+=3) {
|
||||
assertTrue(stpf.incrementToken());
|
||||
log("Token "+i+": "+stpf);
|
||||
String w = English.intToEnglish(i).trim();
|
||||
assertEquals("expecting token "+i+" to be "+w,w,termAtt.term());
|
||||
assertEquals("expecting token "+i+" to be "+w,w,termAtt.toString());
|
||||
assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,posIncrAtt.getPositionIncrement());
|
||||
}
|
||||
assertFalse(stpf.incrementToken());
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.analysis;
|
|||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.English;
|
||||
import java.io.IOException;
|
||||
|
@ -59,16 +59,16 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
|
|||
static final TeeSinkTokenFilter.SinkFilter theFilter = new TeeSinkTokenFilter.SinkFilter() {
|
||||
@Override
|
||||
public boolean accept(AttributeSource a) {
|
||||
TermAttribute termAtt = a.getAttribute(TermAttribute.class);
|
||||
return termAtt.term().equalsIgnoreCase("The");
|
||||
CharTermAttribute termAtt = a.getAttribute(CharTermAttribute.class);
|
||||
return termAtt.toString().equalsIgnoreCase("The");
|
||||
}
|
||||
};
|
||||
|
||||
static final TeeSinkTokenFilter.SinkFilter dogFilter = new TeeSinkTokenFilter.SinkFilter() {
|
||||
@Override
|
||||
public boolean accept(AttributeSource a) {
|
||||
TermAttribute termAtt = a.getAttribute(TermAttribute.class);
|
||||
return termAtt.term().equalsIgnoreCase("Dogs");
|
||||
CharTermAttribute termAtt = a.getAttribute(CharTermAttribute.class);
|
||||
return termAtt.toString().equalsIgnoreCase("Dogs");
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -135,8 +135,8 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
|
|||
TokenStream sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(100));
|
||||
teeStream.consumeAllTokens();
|
||||
TokenStream stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString()))), 100);
|
||||
TermAttribute tfTok = stream.addAttribute(TermAttribute.class);
|
||||
TermAttribute sinkTok = sink.addAttribute(TermAttribute.class);
|
||||
CharTermAttribute tfTok = stream.addAttribute(CharTermAttribute.class);
|
||||
CharTermAttribute sinkTok = sink.addAttribute(CharTermAttribute.class);
|
||||
for (int i=0; stream.incrementToken(); i++) {
|
||||
assertTrue(sink.incrementToken());
|
||||
assertTrue(tfTok + " is not equal to " + sinkTok + " at token: " + i, tfTok.equals(sinkTok) == true);
|
||||
|
|
|
@ -244,8 +244,8 @@ public class TestToken extends LuceneTestCase {
|
|||
assertTrue("TypeAttribute is not implemented by SenselessAttributeImpl",
|
||||
ts.addAttribute(SenselessAttribute.class) instanceof SenselessAttributeImpl);
|
||||
|
||||
assertTrue("TermAttribute is not implemented by Token",
|
||||
ts.addAttribute(TermAttribute.class) instanceof Token);
|
||||
assertTrue("CharTermAttribute is not implemented by Token",
|
||||
ts.addAttribute(CharTermAttribute.class) instanceof Token);
|
||||
assertTrue("OffsetAttribute is not implemented by Token",
|
||||
ts.addAttribute(OffsetAttribute.class) instanceof Token);
|
||||
assertTrue("FlagsAttribute is not implemented by Token",
|
||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.lucene.analysis.WhitespaceTokenizer;
|
|||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
|
@ -152,15 +152,15 @@ public class TestDocumentWriter extends LuceneTestCase {
|
|||
restoreState(state);
|
||||
payloadAtt.setPayload(null);
|
||||
posIncrAtt.setPositionIncrement(0);
|
||||
termAtt.setTermBuffer(new char[]{'b'}, 0, 1);
|
||||
termAtt.setEmpty().append("b");
|
||||
state = null;
|
||||
return true;
|
||||
}
|
||||
|
||||
boolean hasNext = input.incrementToken();
|
||||
if (!hasNext) return false;
|
||||
if (Character.isDigit(termAtt.termBuffer()[0])) {
|
||||
posIncrAtt.setPositionIncrement(termAtt.termBuffer()[0] - '0');
|
||||
if (Character.isDigit(termAtt.buffer()[0])) {
|
||||
posIncrAtt.setPositionIncrement(termAtt.buffer()[0] - '0');
|
||||
}
|
||||
if (first) {
|
||||
// set payload on first position only
|
||||
|
@ -174,7 +174,7 @@ public class TestDocumentWriter extends LuceneTestCase {
|
|||
|
||||
}
|
||||
|
||||
TermAttribute termAtt = addAttribute(TermAttribute.class);
|
||||
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
|
||||
PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
};
|
||||
|
@ -215,7 +215,7 @@ public class TestDocumentWriter extends LuceneTestCase {
|
|||
private String[] tokens = new String[] {"term1", "term2", "term3", "term2"};
|
||||
private int index = 0;
|
||||
|
||||
private TermAttribute termAtt = addAttribute(TermAttribute.class);
|
||||
private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
|
@ -223,7 +223,7 @@ public class TestDocumentWriter extends LuceneTestCase {
|
|||
return false;
|
||||
} else {
|
||||
clearAttributes();
|
||||
termAtt.setTermBuffer(tokens[index++]);
|
||||
termAtt.setEmpty().append(tokens[index++]);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
|||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -3482,7 +3482,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
// LUCENE-1255
|
||||
public void testNegativePositions() throws Throwable {
|
||||
final TokenStream tokens = new TokenStream() {
|
||||
final TermAttribute termAtt = addAttribute(TermAttribute.class);
|
||||
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
final Iterator<String> terms = Arrays.asList("a","b","c").iterator();
|
||||
|
@ -3492,7 +3492,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
public boolean incrementToken() {
|
||||
if (!terms.hasNext()) return false;
|
||||
clearAttributes();
|
||||
termAtt.setTermBuffer( terms.next());
|
||||
termAtt.append(terms.next());
|
||||
posIncrAtt.setPositionIncrement(first ? 0 : 1);
|
||||
first = false;
|
||||
return true;
|
||||
|
|
|
@ -33,7 +33,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
|
@ -538,7 +538,7 @@ public class TestPayloads extends LuceneTestCase {
|
|||
private ByteArrayPool pool;
|
||||
private String term;
|
||||
|
||||
TermAttribute termAtt;
|
||||
CharTermAttribute termAtt;
|
||||
PayloadAttribute payloadAtt;
|
||||
|
||||
PoolingPayloadTokenStream(ByteArrayPool pool) {
|
||||
|
@ -548,7 +548,7 @@ public class TestPayloads extends LuceneTestCase {
|
|||
term = pool.bytesToString(payload);
|
||||
first = true;
|
||||
payloadAtt = addAttribute(PayloadAttribute.class);
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
termAtt = addAttribute(CharTermAttribute.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -556,7 +556,7 @@ public class TestPayloads extends LuceneTestCase {
|
|||
if (!first) return false;
|
||||
first = false;
|
||||
clearAttributes();
|
||||
termAtt.setTermBuffer(term);
|
||||
termAtt.append(term);
|
||||
payloadAtt.setPayload(new Payload(payload));
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.store.MockRAMDirectory;
|
||||
|
@ -123,12 +123,12 @@ public class TestTermVectorsReader extends LuceneTestCase {
|
|||
private class MyTokenStream extends TokenStream {
|
||||
int tokenUpto;
|
||||
|
||||
TermAttribute termAtt;
|
||||
CharTermAttribute termAtt;
|
||||
PositionIncrementAttribute posIncrAtt;
|
||||
OffsetAttribute offsetAtt;
|
||||
|
||||
public MyTokenStream() {
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
termAtt = addAttribute(CharTermAttribute.class);
|
||||
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
}
|
||||
|
@ -140,7 +140,7 @@ public class TestTermVectorsReader extends LuceneTestCase {
|
|||
else {
|
||||
final TestToken testToken = tokens[tokenUpto++];
|
||||
clearAttributes();
|
||||
termAtt.setTermBuffer(testToken.text);
|
||||
termAtt.append(testToken.text);
|
||||
offsetAtt.setOffset(testToken.startOffset, testToken.endOffset);
|
||||
if (tokenUpto > 1) {
|
||||
posIncrAtt.setPositionIncrement(testToken.pos - tokens[tokenUpto-2].pos);
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Random;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
|
@ -33,12 +33,12 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
|
||||
class RepeatingTokenStream extends TokenStream {
|
||||
public int num;
|
||||
TermAttribute termAtt;
|
||||
CharTermAttribute termAtt;
|
||||
String value;
|
||||
|
||||
public RepeatingTokenStream(String val) {
|
||||
this.value = val;
|
||||
this.termAtt = addAttribute(TermAttribute.class);
|
||||
this.termAtt = addAttribute(CharTermAttribute.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -46,7 +46,7 @@ class RepeatingTokenStream extends TokenStream {
|
|||
num--;
|
||||
if (num >= 0) {
|
||||
clearAttributes();
|
||||
termAtt.setTermBuffer(value);
|
||||
termAtt.append(value);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
|
@ -26,7 +26,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
@ -148,14 +148,14 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
|
|||
private int prevStartOffset;
|
||||
private int prevEndOffset;
|
||||
|
||||
TermAttribute termAtt;
|
||||
CharTermAttribute termAtt;
|
||||
PositionIncrementAttribute posIncrAtt;
|
||||
OffsetAttribute offsetAtt;
|
||||
TypeAttribute typeAtt;
|
||||
|
||||
public TestFilter(TokenStream in) {
|
||||
super(in);
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
termAtt = addAttribute(CharTermAttribute.class);
|
||||
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
typeAtt = addAttribute(TypeAttribute.class);
|
||||
|
@ -164,7 +164,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
|
|||
@Override
|
||||
public final boolean incrementToken() throws java.io.IOException {
|
||||
if (multiToken > 0) {
|
||||
termAtt.setTermBuffer("multi"+(multiToken+1));
|
||||
termAtt.setEmpty().append("multi"+(multiToken+1));
|
||||
offsetAtt.setOffset(prevStartOffset, prevEndOffset);
|
||||
typeAtt.setType(prevType);
|
||||
posIncrAtt.setPositionIncrement(0);
|
||||
|
@ -178,7 +178,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
|
|||
prevType = typeAtt.type();
|
||||
prevStartOffset = offsetAtt.startOffset();
|
||||
prevEndOffset = offsetAtt.endOffset();
|
||||
String text = termAtt.term();
|
||||
String text = termAtt.toString();
|
||||
if (text.equals("triplemulti")) {
|
||||
multiToken = 2;
|
||||
return true;
|
||||
|
@ -212,21 +212,21 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
|
|||
|
||||
private final class TestPosIncrementFilter extends TokenFilter {
|
||||
|
||||
TermAttribute termAtt;
|
||||
CharTermAttribute termAtt;
|
||||
PositionIncrementAttribute posIncrAtt;
|
||||
|
||||
public TestPosIncrementFilter(TokenStream in) {
|
||||
super(in);
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
termAtt = addAttribute(CharTermAttribute.class);
|
||||
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken () throws java.io.IOException {
|
||||
while(input.incrementToken()) {
|
||||
if (termAtt.term().equals("the")) {
|
||||
if (termAtt.toString().equals("the")) {
|
||||
// stopword, do nothing
|
||||
} else if (termAtt.term().equals("quick")) {
|
||||
} else if (termAtt.toString().equals("quick")) {
|
||||
posIncrAtt.setPositionIncrement(2);
|
||||
return true;
|
||||
} else {
|
||||
|
|
|
@ -40,7 +40,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.DateField;
|
||||
import org.apache.lucene.document.DateTools;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -82,7 +82,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
public static Analyzer qpAnalyzer = new QPTestAnalyzer();
|
||||
|
||||
public static class QPTestFilter extends TokenFilter {
|
||||
TermAttribute termAtt;
|
||||
CharTermAttribute termAtt;
|
||||
OffsetAttribute offsetAtt;
|
||||
|
||||
/**
|
||||
|
@ -91,7 +91,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
*/
|
||||
public QPTestFilter(TokenStream in) {
|
||||
super(in);
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
termAtt = addAttribute(CharTermAttribute.class);
|
||||
offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
}
|
||||
|
||||
|
@ -103,19 +103,19 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
if (inPhrase) {
|
||||
inPhrase = false;
|
||||
clearAttributes();
|
||||
termAtt.setTermBuffer("phrase2");
|
||||
termAtt.append("phrase2");
|
||||
offsetAtt.setOffset(savedStart, savedEnd);
|
||||
return true;
|
||||
} else
|
||||
while (input.incrementToken()) {
|
||||
if (termAtt.term().equals("phrase")) {
|
||||
if (termAtt.toString().equals("phrase")) {
|
||||
inPhrase = true;
|
||||
savedStart = offsetAtt.startOffset();
|
||||
savedEnd = offsetAtt.endOffset();
|
||||
termAtt.setTermBuffer("phrase1");
|
||||
termAtt.setEmpty().append("phrase1");
|
||||
offsetAtt.setOffset(savedStart, savedEnd);
|
||||
return true;
|
||||
} else if (!termAtt.term().equals("stop"))
|
||||
} else if (!termAtt.toString().equals("stop"))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
|||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -74,7 +74,7 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
private int i = 0;
|
||||
|
||||
PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
TermAttribute termAtt = addAttribute(TermAttribute.class);
|
||||
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
|
||||
@Override
|
||||
|
@ -82,7 +82,7 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
if (i == TOKENS.length)
|
||||
return false;
|
||||
clearAttributes();
|
||||
termAtt.setTermBuffer(TOKENS[i]);
|
||||
termAtt.append(TOKENS[i]);
|
||||
offsetAtt.setOffset(i,i);
|
||||
posIncrAtt.setPositionIncrement(INCREMENTS[i]);
|
||||
i++;
|
||||
|
@ -347,7 +347,7 @@ class PayloadFilter extends TokenFilter {
|
|||
|
||||
final PositionIncrementAttribute posIncrAttr;
|
||||
final PayloadAttribute payloadAttr;
|
||||
final TermAttribute termAttr;
|
||||
final CharTermAttribute termAttr;
|
||||
|
||||
public PayloadFilter(TokenStream input, String fieldName) {
|
||||
super(input);
|
||||
|
@ -356,7 +356,7 @@ class PayloadFilter extends TokenFilter {
|
|||
i = 0;
|
||||
posIncrAttr = input.addAttribute(PositionIncrementAttribute.class);
|
||||
payloadAttr = input.addAttribute(PayloadAttribute.class);
|
||||
termAttr = input.addAttribute(TermAttribute.class);
|
||||
termAttr = input.addAttribute(CharTermAttribute.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -372,7 +372,7 @@ class PayloadFilter extends TokenFilter {
|
|||
posIncrAttr.setPositionIncrement(posIncr);
|
||||
pos += posIncr;
|
||||
if (TestPositionIncrement.VERBOSE) {
|
||||
System.out.println("term=" + termAttr.term() + " pos=" + pos);
|
||||
System.out.println("term=" + termAttr + " pos=" + pos);
|
||||
}
|
||||
i++;
|
||||
return true;
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.store.RAMDirectory;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import java.io.IOException;
|
||||
|
@ -280,27 +280,25 @@ public class TestTermRangeQuery extends LuceneTestCase {
|
|||
|
||||
private static class SingleCharTokenizer extends Tokenizer {
|
||||
char[] buffer = new char[1];
|
||||
boolean done;
|
||||
TermAttribute termAtt;
|
||||
boolean done = false;
|
||||
CharTermAttribute termAtt;
|
||||
|
||||
public SingleCharTokenizer(Reader r) {
|
||||
super(r);
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
termAtt = addAttribute(CharTermAttribute.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
int count = input.read(buffer);
|
||||
if (done)
|
||||
return false;
|
||||
else {
|
||||
int count = input.read(buffer);
|
||||
clearAttributes();
|
||||
done = true;
|
||||
if (count == 1) {
|
||||
termAtt.termBuffer()[0] = buffer[0];
|
||||
termAtt.setTermLength(1);
|
||||
} else
|
||||
termAtt.setTermLength(0);
|
||||
termAtt.copyBuffer(buffer, 0, 1);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.lucene.analysis.TokenFilter;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
|
@ -479,7 +479,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
Set<String> nopayload = new HashSet<String>();
|
||||
int pos;
|
||||
PayloadAttribute payloadAtt;
|
||||
TermAttribute termAtt;
|
||||
CharTermAttribute termAtt;
|
||||
PositionIncrementAttribute posIncrAtt;
|
||||
|
||||
public PayloadFilter(TokenStream input, String fieldName) {
|
||||
|
@ -490,7 +490,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
entities.add("one");
|
||||
nopayload.add("nopayload");
|
||||
nopayload.add("np");
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
termAtt = addAttribute(CharTermAttribute.class);
|
||||
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
payloadAtt = addAttribute(PayloadAttribute.class);
|
||||
}
|
||||
|
@ -498,7 +498,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
String token = new String(termAtt.termBuffer(), 0, termAtt.termLength());
|
||||
String token = termAtt.toString();
|
||||
|
||||
if (!nopayload.contains(token)) {
|
||||
if (entities.contains(token)) {
|
||||
|
|
Loading…
Reference in New Issue