SOLR-1876: convert all Solr tokenstreams to CharTermAttribute, make all non-final TokenStreams/Analyzers final

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@932862 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2010-04-11 10:31:34 +00:00
parent 3ae65e769c
commit 9a43d0ee41
44 changed files with 237 additions and 232 deletions

View File

@ -59,7 +59,8 @@ Upgrading from Solr 1.4
"terms" container is a map instead of a named list. This affects
response formats like JSON, but not XML. (yonik)
* SOLR-1876: All Analyzers and TokenStreams are now final to enforce
the decorator pattern. (rmuir, uschindler)
Detailed Change List
----------------------

View File

@ -455,6 +455,10 @@
<formatter classname="${junit.details.formatter}" usefile="false" if="junit.details"/>
<classpath refid="test.run.classpath"/>
<assertions>
<enable package="org.apache.lucene"/>
<enable package="org.apache.solr"/>
</assertions>
<formatter type="${junit.formatter}"/>
<batchtest fork="yes" todir="${junit.output.dir}" if="runall">
<fileset dir="src/test" includes="**/Test@{pattern}*.java,**/@{pattern}*Test.java"/>

View File

@ -139,6 +139,10 @@
>
<formatter type="brief" usefile="false" if="junit.details"/>
<classpath refid="test.classpath"/>
<assertions>
<enable package="org.apache.lucene"/>
<enable package="org.apache.solr"/>
</assertions>
<formatter type="xml"/>
<batchtest fork="yes" todir="${junit.output.dir}" unless="testcase">
<fileset dir="src/test/java" includes="${junit.includes}">

View File

@ -162,6 +162,10 @@
>
<formatter type="brief" usefile="false" if="junit.details"/>
<classpath refid="test.extras.classpath"/>
<assertions>
<enable package="org.apache.lucene"/>
<enable package="org.apache.solr"/>
</assertions>
<formatter type="xml"/>
<batchtest fork="yes" todir="${junit.output.dir}" unless="testcase">
<fileset dir="src/extras/test/java" includes="${junit.includes}"/>

View File

@ -88,6 +88,10 @@
>
<formatter type="brief" usefile="false" if="junit.details"/>
<classpath refid="test.classpath"/>
<assertions>
<enable package="org.apache.lucene"/>
<enable package="org.apache.solr"/>
</assertions>
<formatter type="xml"/>
<batchtest fork="yes" todir="${junit.output.dir}" unless="testcase">
<fileset dir="src/test/java" includes="${junit.includes}"/>

View File

@ -87,6 +87,10 @@
<sysproperty key="java.util.logging.config.file" value="${common-solr.dir}/testlogging.properties"/>
<formatter type="brief" usefile="false"/>
<classpath refid="test.classpath"/>
<assertions>
<enable package="org.apache.lucene"/>
<enable package="org.apache.solr"/>
</assertions>
<!--<formatter type="xml" usefile="false"/>-->
<batchtest fork="yes" todir="${junit.output.dir}" unless="testcase">
<fileset dir="src/test" includes="${junit.includes}"/>

View File

@ -20,11 +20,11 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeSource; // javadoc @link
@ -73,7 +73,7 @@ public abstract class BufferedTokenStream extends TokenFilter {
private final LinkedList<Token> inQueue = new LinkedList<Token>();
private final LinkedList<Token> outQueue = new LinkedList<Token>();
private final TermAttribute termAtt = addAttribute(TermAttribute.class);
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
@ -150,7 +150,7 @@ public abstract class BufferedTokenStream extends TokenFilter {
return null;
} else {
Token token = new Token();
token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
token.setType(typeAtt.type());
token.setFlags(flagsAtt.getFlags());
@ -163,7 +163,7 @@ public abstract class BufferedTokenStream extends TokenFilter {
/** old api emulation for back compat */
private boolean writeToken(Token token) throws IOException {
clearAttributes();
termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
termAtt.copyBuffer(token.termBuffer(), 0, token.termLength());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
typeAtt.setType(token.type());
flagsAtt.setFlags(token.getFlags());

View File

@ -18,7 +18,7 @@
package org.apache.solr.analysis;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.IOException;
import java.util.ArrayList;
@ -188,22 +188,21 @@ public class CapitalizationFilterFactory extends BaseTokenFilterFactory {
* <p/>
* This is package protected since it is not useful without the Factory
*/
class CapitalizationFilter extends TokenFilter {
final class CapitalizationFilter extends TokenFilter {
private final CapitalizationFilterFactory factory;
private final TermAttribute termAtt;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
public CapitalizationFilter(TokenStream in, final CapitalizationFilterFactory factory) {
super(in);
this.factory = factory;
this.termAtt = addAttribute(TermAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (!input.incrementToken()) return false;
char[] termBuffer = termAtt.termBuffer();
int termBufferLength = termAtt.termLength();
char[] termBuffer = termAtt.buffer();
int termBufferLength = termAtt.length();
char[] backup = null;
if (factory.maxWordCount < CapitalizationFilterFactory.DEFAULT_MAX_WORD_COUNT) {
//make a backup in case we exceed the word count
@ -232,7 +231,7 @@ class CapitalizationFilter extends TokenFilter {
}
if (wordCount > factory.maxWordCount) {
termAtt.setTermBuffer(backup, 0, termBufferLength);
termAtt.copyBuffer(backup, 0, termBufferLength);
}
}

View File

@ -18,7 +18,7 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.Version;
@ -52,7 +52,7 @@ public final class CommonGramsFilter extends TokenFilter {
private final StringBuilder buffer = new StringBuilder();
private final TermAttribute termAttribute = addAttribute(TermAttribute.class);
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
@ -231,7 +231,7 @@ public final class CommonGramsFilter extends TokenFilter {
* @return {@code true} if the current token is a common term, {@code false} otherwise
*/
private boolean isCommon() {
return commonWords != null && commonWords.contains(termAttribute.termBuffer(), 0, termAttribute.termLength());
return commonWords != null && commonWords.contains(termAttribute.buffer(), 0, termAttribute.length());
}
/**
@ -239,7 +239,7 @@ public final class CommonGramsFilter extends TokenFilter {
*/
private void saveTermBuffer() {
buffer.setLength(0);
buffer.append(termAttribute.termBuffer(), 0, termAttribute.termLength());
buffer.append(termAttribute.buffer(), 0, termAttribute.length());
buffer.append(SEPARATOR);
lastStartOffset = offsetAttribute.startOffset();
lastWasCommon = isCommon();
@ -249,19 +249,19 @@ public final class CommonGramsFilter extends TokenFilter {
* Constructs a compound token.
*/
private void gramToken() {
buffer.append(termAttribute.termBuffer(), 0, termAttribute.termLength());
buffer.append(termAttribute.buffer(), 0, termAttribute.length());
int endOffset = offsetAttribute.endOffset();
clearAttributes();
int length = buffer.length();
char termText[] = termAttribute.termBuffer();
char termText[] = termAttribute.buffer();
if (length > termText.length) {
termText = termAttribute.resizeTermBuffer(length);
termText = termAttribute.resizeBuffer(length);
}
buffer.getChars(0, length, termText, 0);
termAttribute.setTermLength(length);
termAttribute.setLength(length);
posIncAttribute.setPositionIncrement(0);
offsetAttribute.setOffset(lastStartOffset, endOffset);
typeAttribute.setType(GRAM_TYPE);

View File

@ -22,25 +22,23 @@ import java.util.LinkedList;
import org.apache.commons.codec.language.DoubleMetaphone;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
public class DoubleMetaphoneFilter extends TokenFilter {
public final class DoubleMetaphoneFilter extends TokenFilter {
private static final String TOKEN_TYPE = "DoubleMetaphone";
private final LinkedList<State> remainingTokens = new LinkedList<State>();
private final DoubleMetaphone encoder = new DoubleMetaphone();
private final boolean inject;
private final TermAttribute termAtt;
private final PositionIncrementAttribute posAtt;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
protected DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, boolean inject) {
super(input);
this.encoder.setMaxCodeLen(maxCodeLength);
this.inject = inject;
this.termAtt = addAttribute(TermAttribute.class);
this.posAtt = addAttribute(PositionIncrementAttribute.class);
}
@Override
@ -55,12 +53,12 @@ public class DoubleMetaphoneFilter extends TokenFilter {
if (!input.incrementToken()) return false;
int len = termAtt.termLength();
int len = termAtt.length();
if (len==0) return true; // pass through zero length terms
int firstAlternativeIncrement = inject ? 0 : posAtt.getPositionIncrement();
String v = new String(termAtt.termBuffer(), 0, len);
String v = termAtt.toString();
String primaryPhoneticValue = encoder.doubleMetaphone(v);
String alternatePhoneticValue = encoder.doubleMetaphone(v, true);
@ -74,7 +72,7 @@ public class DoubleMetaphoneFilter extends TokenFilter {
}
posAtt.setPositionIncrement( firstAlternativeIncrement );
firstAlternativeIncrement = 0;
termAtt.setTermBuffer(primaryPhoneticValue);
termAtt.setEmpty().append(primaryPhoneticValue);
saveState = true;
}
@ -86,7 +84,7 @@ public class DoubleMetaphoneFilter extends TokenFilter {
saveState = false;
}
posAtt.setPositionIncrement( firstAlternativeIncrement );
termAtt.setTermBuffer(alternatePhoneticValue);
termAtt.setEmpty().append(alternatePhoneticValue);
saveState = true;
}

View File

@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* When the plain text is extracted from documents, we will often have many words hyphenated and broken into
@ -54,7 +54,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
*/
public final class HyphenatedWordsFilter extends TokenFilter {
private final TermAttribute termAttribute = addAttribute(TermAttribute.class);
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
private final StringBuilder hyphenated = new StringBuilder();
@ -75,8 +75,8 @@ public final class HyphenatedWordsFilter extends TokenFilter {
@Override
public boolean incrementToken() throws IOException {
while (input.incrementToken()) {
char[] term = termAttribute.termBuffer();
int termLength = termAttribute.termLength();
char[] term = termAttribute.buffer();
int termLength = termAttribute.length();
if (termLength > 0 && term[termLength - 1] == '-') {
// a hyphenated word
@ -128,14 +128,14 @@ public final class HyphenatedWordsFilter extends TokenFilter {
restoreState(savedState);
savedState = null;
char term[] = termAttribute.termBuffer();
char term[] = termAttribute.buffer();
int length = hyphenated.length();
if (length > termAttribute.termLength()) {
term = termAttribute.resizeTermBuffer(length);
if (length > termAttribute.length()) {
term = termAttribute.resizeBuffer(length);
}
hyphenated.getChars(0, length, term, 0);
termAttribute.setTermLength(length);
termAttribute.setLength(length);
offsetAttribute.setOffset(offsetAttribute.startOffset(), endOffset);
hyphenated.setLength(0);
}

View File

@ -20,7 +20,7 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.IOException;
import java.util.Set;
@ -34,7 +34,7 @@ import java.util.Set;
*/
public final class KeepWordFilter extends TokenFilter {
private final CharArraySet words;
private final TermAttribute termAtt;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/** @deprecated Use {@link #KeepWordFilter(TokenStream, Set, boolean)} instead */
@Deprecated
@ -47,13 +47,12 @@ public final class KeepWordFilter extends TokenFilter {
public KeepWordFilter(TokenStream in, CharArraySet words) {
super(in);
this.words = words;
this.termAtt = addAttribute(TermAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
while (input.incrementToken()) {
if (words.contains(termAtt.termBuffer(), 0, termAtt.termLength())) return true;
if (words.contains(termAtt.buffer(), 0, termAtt.length())) return true;
}
return false;
}

View File

@ -42,7 +42,7 @@ public final class PatternReplaceFilter extends TokenFilter {
private final Pattern p;
private final String replacement;
private final boolean all;
private final CharTermAttribute termAtt;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final Matcher m;
/**
@ -64,7 +64,6 @@ public final class PatternReplaceFilter extends TokenFilter {
this.p=p;
this.replacement = (null == replacement) ? "" : replacement;
this.all=all;
this.termAtt = addAttribute(CharTermAttribute.class);
this.m = p.matcher(termAtt);
}

View File

@ -22,7 +22,7 @@ import java.io.Reader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.commons.io.IOUtils;
@ -56,7 +56,7 @@ import org.apache.commons.io.IOUtils;
*/
public final class PatternTokenizer extends Tokenizer {
private final TermAttribute termAtt = addAttribute(TermAttribute.class);
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private String str;
@ -86,7 +86,7 @@ public final class PatternTokenizer extends Tokenizer {
while (matcher.find()) {
final String match = matcher.group(group);
if (match.length() == 0) continue;
termAtt.setTermBuffer(match);
termAtt.setEmpty().append(match);
index = matcher.start(group);
offsetAtt.setOffset(correctOffset(index), correctOffset(matcher.end(group)));
return true;
@ -101,7 +101,7 @@ public final class PatternTokenizer extends Tokenizer {
while (matcher.find()) {
if (matcher.start() - index > 0) {
// found a non-zero-length token
termAtt.setTermBuffer(str, index, matcher.start() - index);
termAtt.setEmpty().append(str, index, matcher.start());
offsetAtt.setOffset(correctOffset(index), correctOffset(matcher.start()));
index = matcher.end();
return true;
@ -115,7 +115,7 @@ public final class PatternTokenizer extends Tokenizer {
return false;
}
termAtt.setTermBuffer(str, index, str.length() - index);
termAtt.setEmpty().append(str, index, str.length());
offsetAtt.setOffset(correctOffset(index), correctOffset(str.length()));
index = Integer.MAX_VALUE; // mark exhausted
return true;

View File

@ -20,7 +20,7 @@ package org.apache.solr.analysis;
import org.apache.commons.codec.Encoder;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import java.io.IOException;
@ -31,23 +31,21 @@ import java.io.IOException;
*
* @version $Id$
*/
public class PhoneticFilter extends TokenFilter
public final class PhoneticFilter extends TokenFilter
{
protected boolean inject = true;
protected Encoder encoder = null;
protected String name = null;
protected State save = null;
private final TermAttribute termAtt;
private final PositionIncrementAttribute posAtt;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
public PhoneticFilter(TokenStream in, Encoder encoder, String name, boolean inject) {
super(in);
this.encoder = encoder;
this.name = name;
this.inject = inject;
this.termAtt = addAttribute(TermAttribute.class);
this.posAtt = addAttribute(PositionIncrementAttribute.class);
this.inject = inject;
}
@Override
@ -62,9 +60,9 @@ public class PhoneticFilter extends TokenFilter
if (!input.incrementToken()) return false;
// pass through zero-length terms
if (termAtt.termLength()==0) return true;
if (termAtt.length() == 0) return true;
String value = termAtt.term();
String value = termAtt.toString();
String phonetic = null;
try {
String v = encoder.encode(value).toString();
@ -75,7 +73,7 @@ public class PhoneticFilter extends TokenFilter
if (!inject) {
// just modify this token
termAtt.setTermBuffer(phonetic);
termAtt.setEmpty().append(phonetic);
return true;
}
@ -88,7 +86,7 @@ public class PhoneticFilter extends TokenFilter
save = captureState();
posAtt.setPositionIncrement(origOffset);
termAtt.setTermBuffer(phonetic);
termAtt.setEmpty().append(phonetic);
return true;
}

View File

@ -20,8 +20,8 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.Version;
import java.io.IOException;
@ -31,7 +31,7 @@ import java.io.IOException;
*/
public final class RemoveDuplicatesTokenFilter extends TokenFilter {
private final TermAttribute termAttribute = addAttribute(TermAttribute.class);
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
// use a fixed version, as we don't care about case sensitivity.
@ -52,8 +52,8 @@ public final class RemoveDuplicatesTokenFilter extends TokenFilter {
@Override
public boolean incrementToken() throws IOException {
while (input.incrementToken()) {
final char term[] = termAttribute.termBuffer();
final int length = termAttribute.termLength();
final char term[] = termAttribute.buffer();
final int length = termAttribute.length();
final int posIncrement = posIncAttribute.getPositionIncrement();
if (posIncrement > 0) {

View File

@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* This class produces a special form of reversed tokens, suitable for
@ -35,17 +35,17 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
* <code>withOriginal == true</code>, which proportionally increases the size
* of postings and term dictionary in the index.
*/
public class ReversedWildcardFilter extends TokenFilter {
public final class ReversedWildcardFilter extends TokenFilter {
private boolean withOriginal;
private char markerChar;
private State save;
private TermAttribute termAtt;
private CharTermAttribute termAtt;
private PositionIncrementAttribute posAtt;
protected ReversedWildcardFilter(TokenStream input, boolean withOriginal, char markerChar) {
super(input);
this.termAtt = addAttribute(TermAttribute.class);
this.termAtt = addAttribute(CharTermAttribute.class);
this.posAtt = addAttribute(PositionIncrementAttribute.class);
this.withOriginal = withOriginal;
this.markerChar = markerChar;
@ -63,19 +63,19 @@ public class ReversedWildcardFilter extends TokenFilter {
if (!input.incrementToken()) return false;
// pass through zero-length terms
int oldLen = termAtt.termLength();
int oldLen = termAtt.length();
if (oldLen ==0) return true;
int origOffset = posAtt.getPositionIncrement();
if (withOriginal == true){
posAtt.setPositionIncrement(0);
save = captureState();
}
char [] buffer = termAtt.resizeTermBuffer(oldLen + 1);
char [] buffer = termAtt.resizeBuffer(oldLen + 1);
buffer[oldLen] = markerChar;
reverse(buffer, 0, oldLen + 1);
posAtt.setPositionIncrement(origOffset);
termAtt.setTermBuffer(buffer, 0, oldLen +1);
termAtt.copyBuffer(buffer, 0, oldLen +1);
return true;
}

View File

@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeSource;
@ -41,7 +41,7 @@ import java.util.LinkedList;
*
* @version $Id$
*/
public class SynonymFilter extends TokenFilter {
public final class SynonymFilter extends TokenFilter {
private final SynonymMap map; // Map<String, SynonymMap>
private Iterator<AttributeSource> replacement; // iterator over generated tokens
@ -50,7 +50,7 @@ public class SynonymFilter extends TokenFilter {
super(in);
this.map = map;
// just ensuring these attributes exist...
addAttribute(TermAttribute.class);
addAttribute(CharTermAttribute.class);
addAttribute(PositionIncrementAttribute.class);
addAttribute(OffsetAttribute.class);
addAttribute(TypeAttribute.class);
@ -87,8 +87,8 @@ public class SynonymFilter extends TokenFilter {
// common case fast-path of first token not matching anything
AttributeSource firstTok = nextTok();
if (firstTok == null) return false;
TermAttribute termAtt = firstTok.addAttribute(TermAttribute.class);
SynonymMap result = map.submap!=null ? map.submap.get(termAtt.termBuffer(), 0, termAtt.termLength()) : null;
CharTermAttribute termAtt = firstTok.addAttribute(CharTermAttribute.class);
SynonymMap result = map.submap!=null ? map.submap.get(termAtt.buffer(), 0, termAtt.length()) : null;
if (result == null) {
copy(this, firstTok);
return true;
@ -128,14 +128,14 @@ public class SynonymFilter extends TokenFilter {
for (int i=0; i<result.synonyms.length; i++) {
Token repTok = result.synonyms[i];
AttributeSource newTok = firstTok.cloneAttributes();
TermAttribute newTermAtt = newTok.addAttribute(TermAttribute.class);
CharTermAttribute newTermAtt = newTok.addAttribute(CharTermAttribute.class);
OffsetAttribute newOffsetAtt = newTok.addAttribute(OffsetAttribute.class);
PositionIncrementAttribute newPosIncAtt = newTok.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute lastOffsetAtt = lastTok.addAttribute(OffsetAttribute.class);
newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset());
newTermAtt.setTermBuffer(repTok.termBuffer(), 0, repTok.termLength());
newTermAtt.copyBuffer(repTok.termBuffer(), 0, repTok.termLength());
repPos += repTok.getPositionIncrement();
if (i==0) repPos=origPos; // make position of first token equal to original
@ -215,8 +215,8 @@ public class SynonymFilter extends TokenFilter {
if (tok == this)
tok = cloneAttributes();
// check for positionIncrement!=1? if>1, should not match, if==0, check multiple at this level?
TermAttribute termAtt = tok.getAttribute(TermAttribute.class);
SynonymMap subMap = map.submap.get(termAtt.termBuffer(), 0, termAtt.termLength());
CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
SynonymMap subMap = map.submap.get(termAtt.buffer(), 0, termAtt.length());
if (subMap != null) {
// recurse

View File

@ -18,7 +18,7 @@
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.solr.common.ResourceLoader;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.util.plugin.ResourceLoaderAware;
@ -135,11 +135,10 @@ public class SynonymFilterFactory extends BaseTokenFilterFactory implements Reso
TokenStream ts = loadTokenizer(tokFactory, reader);
List<String> tokList = new ArrayList<String>();
try {
TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
while (ts.incrementToken()){
String text = new String(termAtt.termBuffer(), 0, termAtt.termLength());
if( text.length() > 0 )
tokList.add( text );
if( termAtt.length() > 0 )
tokList.add( termAtt.toString() );
}
} catch (IOException e) {
throw new RuntimeException(e);

View File

@ -32,7 +32,7 @@ import java.io.Reader;
// An analyzer that uses a tokenizer and a list of token filters to
// create a TokenStream.
//
public class TokenizerChain extends SolrAnalyzer {
public final class TokenizerChain extends SolrAnalyzer {
final private CharFilterFactory[] charFilters;
final private TokenizerFactory tokenizer;
final private TokenFilterFactory[] filters;

View File

@ -51,7 +51,7 @@ public class TrieTokenizerFactory extends BaseTokenizerFactory {
}
}
class TrieTokenizer extends Tokenizer {
final class TrieTokenizer extends Tokenizer {
protected static final DateField dateField = new DateField();
protected final int precisionStep;
protected final TrieTypes type;

View File

@ -19,7 +19,7 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import java.io.IOException;
@ -32,24 +32,21 @@ import java.io.IOException;
public final class TrimFilter extends TokenFilter {
final boolean updateOffsets;
private final TermAttribute termAtt;
private final OffsetAttribute offsetAtt;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
public TrimFilter(TokenStream in, boolean updateOffsets) {
super(in);
this.updateOffsets = updateOffsets;
this.termAtt = addAttribute(TermAttribute.class);
this.offsetAtt = addAttribute(OffsetAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (!input.incrementToken()) return false;
char[] termBuffer = termAtt.termBuffer();
int len = termAtt.termLength();
char[] termBuffer = termAtt.buffer();
int len = termAtt.length();
//TODO: Is this the right behavior or should we return false? Currently, " ", returns true, so I think this should
//also return true
if (len == 0){
@ -69,9 +66,9 @@ public final class TrimFilter extends TokenFilter {
}
if (start > 0 || end < len) {
if (start < end) {
termAtt.setTermBuffer(termBuffer, start, (end - start));
termAtt.copyBuffer(termBuffer, start, (end - start));
} else {
termAtt.setTermLength(0);
termAtt.setEmpty();
}
if (updateOffsets) {
int newStart = offsetAtt.startOffset()+start;

View File

@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
@ -120,7 +120,7 @@ final class WordDelimiterFilter extends TokenFilter {
*/
final CharArraySet protWords;
private final TermAttribute termAttribute = addAttribute(TermAttribute.class);
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
@ -338,8 +338,8 @@ final class WordDelimiterFilter extends TokenFilter {
return false;
}
int termLength = termAttribute.termLength();
char[] termBuffer = termAttribute.termBuffer();
int termLength = termAttribute.length();
char[] termBuffer = termAttribute.buffer();
accumPosInc += posIncAttribute.getPositionIncrement();
@ -462,14 +462,14 @@ final class WordDelimiterFilter extends TokenFilter {
savedStartOffset = offsetAttribute.startOffset();
savedEndOffset = offsetAttribute.endOffset();
// if length by start + end offsets doesn't match the term text then assume this is a synonym and don't adjust the offsets.
hasIllegalOffsets = (savedEndOffset - savedStartOffset != termAttribute.termLength());
hasIllegalOffsets = (savedEndOffset - savedStartOffset != termAttribute.length());
savedType = typeAttribute.type();
if (savedBuffer.length < termAttribute.termLength()) {
savedBuffer = new char[ArrayUtil.oversize(termAttribute.termLength(), RamUsageEstimator.NUM_BYTES_CHAR)];
if (savedBuffer.length < termAttribute.length()) {
savedBuffer = new char[ArrayUtil.oversize(termAttribute.length(), RamUsageEstimator.NUM_BYTES_CHAR)];
}
System.arraycopy(termAttribute.termBuffer(), 0, savedBuffer, 0, termAttribute.termLength());
System.arraycopy(termAttribute.buffer(), 0, savedBuffer, 0, termAttribute.length());
iterator.text = savedBuffer;
hasSavedState = true;
@ -531,7 +531,7 @@ final class WordDelimiterFilter extends TokenFilter {
*/
private void generatePart(boolean isSingleWord) {
clearAttributes();
termAttribute.setTermBuffer(savedBuffer, iterator.current, iterator.end - iterator.current);
termAttribute.copyBuffer(savedBuffer, iterator.current, iterator.end - iterator.current);
int startOffSet = (isSingleWord || !hasIllegalOffsets) ? savedStartOffset + iterator.current : savedStartOffset;
int endOffSet = (hasIllegalOffsets) ? savedEndOffset : savedStartOffset + iterator.end;
@ -636,13 +636,13 @@ final class WordDelimiterFilter extends TokenFilter {
*/
void write() {
clearAttributes();
if (termAttribute.termLength() < buffer.length()) {
termAttribute.resizeTermBuffer(buffer.length());
if (termAttribute.length() < buffer.length()) {
termAttribute.resizeBuffer(buffer.length());
}
char termbuffer[] = termAttribute.termBuffer();
char termbuffer[] = termAttribute.buffer();
buffer.getChars(0, buffer.length(), termbuffer, 0);
termAttribute.setTermLength(buffer.length());
termAttribute.setLength(buffer.length());
if (hasIllegalOffsets) {
offsetAttribute.setOffset(savedStartOffset, savedEndOffset);

View File

@ -18,7 +18,6 @@ package org.apache.solr.handler;
import org.apache.commons.io.IOUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.util.BytesRef;
@ -135,10 +134,10 @@ public class AnalysisRequestHandler extends RequestHandlerBase {
// outer is namedList since order of tokens is important
NamedList<NamedList<Object>> tokens = new NamedList<NamedList<Object>>();
// TODO: support custom attributes
TermAttribute termAtt = null;
CharTermAttribute termAtt = null;
TermToBytesRefAttribute bytesAtt = null;
if (tstream.hasAttribute(TermAttribute.class)) {
termAtt = tstream.getAttribute(TermAttribute.class);
if (tstream.hasAttribute(CharTermAttribute.class)) {
termAtt = tstream.getAttribute(CharTermAttribute.class);
} else if (tstream.hasAttribute(TermToBytesRefAttribute.class)) {
bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class);
}
@ -151,7 +150,7 @@ public class AnalysisRequestHandler extends RequestHandlerBase {
NamedList<Object> token = new SimpleOrderedMap<Object>();
tokens.add("token", token);
if (termAtt != null) {
token.add("value", termAtt.term());
token.add("value", termAtt.toString());
}
if (bytesAtt != null) {
bytesAtt.toBytesRef(bytes);

View File

@ -145,10 +145,10 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
List<Token> tokens = new ArrayList<Token>();
// TODO change this API to support custom attributes
TermAttribute termAtt = null;
CharTermAttribute termAtt = null;
TermToBytesRefAttribute bytesAtt = null;
if (tokenStream.hasAttribute(TermAttribute.class)) {
termAtt = tokenStream.getAttribute(TermAttribute.class);
if (tokenStream.hasAttribute(CharTermAttribute.class)) {
termAtt = tokenStream.getAttribute(CharTermAttribute.class);
} else if (tokenStream.hasAttribute(TermToBytesRefAttribute.class)) {
bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
}
@ -163,7 +163,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
while (tokenStream.incrementToken()) {
Token token = new Token();
if (termAtt != null) {
token.setTermBuffer(termAtt.term());
token.setTermBuffer(termAtt.toString());
}
if (bytesAtt != null) {
bytesAtt.toBytesRef(bytes);
@ -259,12 +259,12 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
* TokenStream that iterates over a list of pre-existing Tokens
*/
// TODO refactor to support custom attributes
protected static class ListBasedTokenStream extends TokenStream {
protected final static class ListBasedTokenStream extends TokenStream {
private final List<Token> tokens;
private Iterator<Token> tokenIterator;
private final TermAttribute termAtt = (TermAttribute)
addAttribute(TermAttribute.class);
private final CharTermAttribute termAtt = (CharTermAttribute)
addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = (OffsetAttribute)
addAttribute(OffsetAttribute.class);
private final TypeAttribute typeAtt = (TypeAttribute)
@ -292,7 +292,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
public boolean incrementToken() throws IOException {
if (tokenIterator.hasNext()) {
Token next = tokenIterator.next();
termAtt.setTermBuffer(next.termBuffer(), 0, next.termLength());
termAtt.copyBuffer(next.termBuffer(), 0, next.termLength());
typeAtt.setType(next.type());
offsetAtt.setOffset(next.startOffset(), next.endOffset());
flagsAtt.setFlags(next.getFlags());

View File

@ -39,7 +39,7 @@ import javax.xml.xpath.XPathFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
@ -298,9 +298,9 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
TokenStream tokens = analyzer.reusableTokenStream( "", new StringReader( query ) );
tokens.reset();
TermAttribute termAtt = (TermAttribute) tokens.addAttribute(TermAttribute.class);
CharTermAttribute termAtt = tokens.addAttribute(CharTermAttribute.class);
while( tokens.incrementToken() ) {
norm.append( termAtt.termBuffer(), 0, termAtt.termLength() );
norm.append( termAtt.buffer(), 0, termAtt.length() );
}
return norm.toString();
}

View File

@ -37,7 +37,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.solr.common.SolrException;
@ -373,16 +373,16 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
TokenStream ts = analyzer.reusableTokenStream("", new StringReader(q));
ts.reset();
// TODO: support custom attributes
TermAttribute termAtt = (TermAttribute) ts.addAttribute(TermAttribute.class);
OffsetAttribute offsetAtt = (OffsetAttribute) ts.addAttribute(OffsetAttribute.class);
TypeAttribute typeAtt = (TypeAttribute) ts.addAttribute(TypeAttribute.class);
FlagsAttribute flagsAtt = (FlagsAttribute) ts.addAttribute(FlagsAttribute.class);
PayloadAttribute payloadAtt = (PayloadAttribute) ts.addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) ts.addAttribute(PositionIncrementAttribute.class);
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
while (ts.incrementToken()){
Token token = new Token();
token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
token.setType(typeAtt.type());
token.setFlags(flagsAtt.getFlags());

View File

@ -512,7 +512,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
* This is meant to work around fickleness in the highlighter only. It
* can mess up token positions and should not be used for indexing or querying.
*/
class TokenOrderingFilter extends TokenFilter {
final class TokenOrderingFilter extends TokenFilter {
private final int windowSize;
private final LinkedList<OrderedToken> queue = new LinkedList<OrderedToken>();
private boolean done=false;
@ -586,7 +586,7 @@ class TermOffsetsTokenStream {
return new MultiValuedStream(length);
}
class MultiValuedStream extends TokenStream {
final class MultiValuedStream extends TokenStream {
private final int length;
OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);

View File

@ -20,11 +20,9 @@ package org.apache.solr.schema;
import org.apache.lucene.search.SortField;
import org.apache.solr.search.function.ValueSource;
import org.apache.solr.search.function.OrdFieldSource;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Fieldable;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.response.XMLWriter;
@ -59,7 +57,7 @@ public class BoolField extends FieldType {
protected final static Analyzer boolAnalyzer = new SolrAnalyzer() {
public TokenStreamInfo getStream(String fieldName, Reader reader) {
Tokenizer tokenizer = new Tokenizer(reader) {
final TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
boolean done = false;
@Override
@ -75,7 +73,7 @@ public class BoolField extends FieldType {
done = true;
int ch = input.read();
if (ch==-1) return false;
termAtt.setTermBuffer(
termAtt.copyBuffer(
((ch=='t' || ch=='T' || ch=='1') ? TRUE_TOKEN : FALSE_TOKEN)
,0,1);
return true;

View File

@ -21,7 +21,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.Query;
@ -369,7 +369,7 @@ public abstract class FieldType extends FieldProperties {
* Default analyzer for types that only produce 1 verbatim token...
* A maximum size of chars to be read must be specified
*/
protected class DefaultAnalyzer extends SolrAnalyzer {
protected final class DefaultAnalyzer extends SolrAnalyzer {
final int maxChars;
DefaultAnalyzer(int maxChars) {
@ -379,15 +379,15 @@ public abstract class FieldType extends FieldProperties {
public TokenStreamInfo getStream(String fieldName, Reader reader) {
Tokenizer ts = new Tokenizer(reader) {
final char[] cbuf = new char[maxChars];
final TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
final OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
@Override
public boolean incrementToken() throws IOException {
clearAttributes();
int n = input.read(cbuf,0,maxChars);
if (n<=0) return false;
String s = toInternal(new String(cbuf,0,n));
termAtt.setTermBuffer(s);
termAtt.setEmpty().append(s);
offsetAtt.setOffset(correctOffset(0),correctOffset(n));
return true;
}

View File

@ -27,7 +27,7 @@ import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Analyzer;
@ -88,7 +88,7 @@ public class TextField extends FieldType {
source = analyzer.tokenStream(field, new StringReader(queryText));
}
CachingTokenFilter buffer = new CachingTokenFilter(source);
TermAttribute termAtt = null;
CharTermAttribute termAtt = null;
PositionIncrementAttribute posIncrAtt = null;
int numTokens = 0;
@ -100,11 +100,11 @@ public class TextField extends FieldType {
// success==false if we hit an exception
}
if (success) {
if (buffer.hasAttribute(TermAttribute.class)) {
termAtt = (TermAttribute) buffer.getAttribute(TermAttribute.class);
if (buffer.hasAttribute(CharTermAttribute.class)) {
termAtt = buffer.getAttribute(CharTermAttribute.class);
}
if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
posIncrAtt = (PositionIncrementAttribute) buffer.getAttribute(PositionIncrementAttribute.class);
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
}
}
@ -147,7 +147,7 @@ public class TextField extends FieldType {
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.term();
term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
@ -164,7 +164,7 @@ public class TextField extends FieldType {
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.term();
term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
}
@ -188,7 +188,7 @@ public class TextField extends FieldType {
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.term();
term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
@ -229,7 +229,7 @@ public class TextField extends FieldType {
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
term = termAtt.term();
term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}

View File

@ -1043,7 +1043,7 @@ class ExtendedDismaxQParser extends QParser {
}
class ExtendedAnalyzer extends Analyzer {
final class ExtendedAnalyzer extends Analyzer {
final Map<String, Analyzer> map = new HashMap<String, Analyzer>();
final QParser parser;
final Analyzer queryAnalyzer;

View File

@ -30,7 +30,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
@ -105,15 +105,15 @@ public class SpellingQueryConverter extends QueryConverter {
try {
stream = analyzer.reusableTokenStream("", new StringReader(word));
// TODO: support custom attributes
TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class);
FlagsAttribute flagsAtt = (FlagsAttribute) stream.addAttribute(FlagsAttribute.class);
TypeAttribute typeAtt = (TypeAttribute) stream.addAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = (PayloadAttribute) stream.addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) stream.addAttribute(PositionIncrementAttribute.class);
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
FlagsAttribute flagsAtt = stream.addAttribute(FlagsAttribute.class);
TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
stream.reset();
while (stream.incrementToken()) {
Token token = new Token();
token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
token.setStartOffset(matcher.start());
token.setEndOffset(matcher.end());
token.setFlags(flagsAtt.getFlags());

View File

@ -24,7 +24,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* Tests CommonGramsQueryFilter
@ -38,20 +38,20 @@ public class CommonGramsFilterTest extends BaseTokenTestCase {
WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
TermAttribute term = cgf.addAttribute(TermAttribute.class);
CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
assertTrue(cgf.incrementToken());
assertEquals("How", term.term());
assertEquals("How", term.toString());
assertTrue(cgf.incrementToken());
assertEquals("How_the", term.term());
assertEquals("How_the", term.toString());
assertTrue(cgf.incrementToken());
assertEquals("the", term.term());
assertEquals("the", term.toString());
assertTrue(cgf.incrementToken());
assertEquals("the_s", term.term());
assertEquals("the_s", term.toString());
wt.reset(new StringReader(input));
cgf.reset();
assertTrue(cgf.incrementToken());
assertEquals("How", term.term());
assertEquals("How", term.toString());
}
public void testQueryReset() throws Exception {
@ -60,16 +60,16 @@ public class CommonGramsFilterTest extends BaseTokenTestCase {
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
TermAttribute term = wt.addAttribute(TermAttribute.class);
CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
assertTrue(nsf.incrementToken());
assertEquals("How_the", term.term());
assertEquals("How_the", term.toString());
assertTrue(nsf.incrementToken());
assertEquals("the_s", term.term());
assertEquals("the_s", term.toString());
wt.reset(new StringReader(input));
nsf.reset();
assertTrue(nsf.incrementToken());
assertEquals("How_the", term.term());
assertEquals("How_the", term.toString());
}
/**

View File

@ -22,7 +22,7 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
public class DoubleMetaphoneFilterFactoryTest extends BaseTokenTestCase {
@ -59,12 +59,12 @@ public class DoubleMetaphoneFilterFactoryTest extends BaseTokenTestCase {
TokenStream inputStream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("international"));
TokenStream filteredStream = factory.create(inputStream);
TermAttribute termAtt = filteredStream.addAttribute(TermAttribute.class);
CharTermAttribute termAtt = filteredStream.addAttribute(CharTermAttribute.class);
assertEquals(DoubleMetaphoneFilter.class, filteredStream.getClass());
assertTrue(filteredStream.incrementToken());
assertEquals(13, termAtt.termLength());
assertEquals("international", termAtt.term());
assertEquals(13, termAtt.length());
assertEquals("international", termAtt.toString());
filteredStream.reset();
// ensure there are no more tokens, such as ANTRNXNL

View File

@ -21,7 +21,7 @@ import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.IOException;
import java.io.StringReader;
@ -75,18 +75,18 @@ public class TestBufferedTokenStream extends BaseTokenTestCase {
final String input = "How now A B brown A cow B like A B thing?";
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
TokenStream ts = new AB_AAB_Stream(tokenizer);
TermAttribute term = ts.addAttribute(TermAttribute.class);
CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
assertTrue(ts.incrementToken());
assertEquals("How", term.term());
assertEquals("How", term.toString());
assertTrue(ts.incrementToken());
assertEquals("now", term.term());
assertEquals("now", term.toString());
assertTrue(ts.incrementToken());
assertEquals("A", term.term());
assertEquals("A", term.toString());
// reset back to input,
// if reset() does not work correctly then previous buffered tokens will remain
tokenizer.reset(new StringReader(input));
ts.reset();
assertTrue(ts.incrementToken());
assertEquals("How", term.term());
assertEquals("How", term.toString());
}
}

View File

@ -30,7 +30,7 @@ import java.util.Map;
import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.solr.common.ResourceLoader;
public class TestCollationKeyFilterFactory extends BaseTokenTestCase {
@ -177,13 +177,13 @@ public class TestCollationKeyFilterFactory extends BaseTokenTestCase {
private void assertCollatesToSame(TokenStream stream1, TokenStream stream2)
throws IOException {
TermAttribute term1 = stream1
.addAttribute(TermAttribute.class);
TermAttribute term2 = stream2
.addAttribute(TermAttribute.class);
CharTermAttribute term1 = stream1
.addAttribute(CharTermAttribute.class);
CharTermAttribute term2 = stream2
.addAttribute(CharTermAttribute.class);
assertTrue(stream1.incrementToken());
assertTrue(stream2.incrementToken());
assertEquals(term1.term(), term2.term());
assertEquals(term1.toString(), term2.toString());
assertFalse(stream1.incrementToken());
assertFalse(stream2.incrementToken());
}

View File

@ -29,7 +29,7 @@ import org.apache.lucene.analysis.CharStream;
import org.apache.lucene.analysis.MappingCharFilter;
import org.apache.lucene.analysis.NormalizeCharMap;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
public class TestPatternTokenizerFactory extends BaseTokenTestCase
{
@ -117,17 +117,17 @@ public class TestPatternTokenizerFactory extends BaseTokenTestCase
*/
private static String tsToString(TokenStream in) throws IOException {
StringBuilder out = new StringBuilder();
TermAttribute termAtt = in.addAttribute(TermAttribute.class);
CharTermAttribute termAtt = in.addAttribute(CharTermAttribute.class);
// extra safety to enforce, that the state is not preserved and also
// assign bogus values
in.clearAttributes();
termAtt.setTermBuffer("bogusTerm");
termAtt.setEmpty().append("bogusTerm");
while (in.incrementToken()) {
if (out.length() > 0)
out.append(' ');
out.append(termAtt.term());
out.append(termAtt.toString());
in.clearAttributes();
termAtt.setTermBuffer("bogusTerm");
termAtt.setEmpty().append("bogusTerm");
}
in.close();

View File

@ -21,7 +21,7 @@ import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.util.Iterator;
import java.util.Arrays;
@ -44,14 +44,14 @@ public class TestRemoveDuplicatesTokenFilter extends BaseTokenTestCase {
RemoveDuplicatesTokenFilterFactory factory = new RemoveDuplicatesTokenFilterFactory();
final TokenStream ts = factory.create
(new TokenStream() {
TermAttribute termAtt = addAttribute(TermAttribute.class);
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
public boolean incrementToken() {
if (toks.hasNext()) {
clearAttributes();
Token tok = toks.next();
termAtt.setTermBuffer(tok.term());
termAtt.setEmpty().append(tok.term());
offsetAtt.setOffset(tok.startOffset(), tok.endOffset());
posIncAtt.setPositionIncrement(tok.getPositionIncrement());
return true;

View File

@ -25,7 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import java.io.IOException;
@ -381,7 +381,7 @@ public class TestSynonymFilter extends BaseTokenTestCase {
private static class IterTokenStream extends TokenStream {
final Token tokens[];
int index = 0;
TermAttribute termAtt = addAttribute(TermAttribute.class);
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
@ -403,7 +403,7 @@ public class TestSynonymFilter extends BaseTokenTestCase {
else {
clearAttributes();
Token token = tokens[index++];
termAtt.setTermBuffer(token.term());
termAtt.setEmpty().append(token.term());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
posIncAtt.setPositionIncrement(token.getPositionIncrement());
flagsAtt.setFlags(token.getFlags());

View File

@ -28,7 +28,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
/**
@ -81,7 +81,7 @@ public class TestTrimFilter extends BaseTokenTestCase {
private static class IterTokenStream extends TokenStream {
final Token tokens[];
int index = 0;
TermAttribute termAtt = addAttribute(TermAttribute.class);
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
@ -103,7 +103,7 @@ public class TestTrimFilter extends BaseTokenTestCase {
else {
clearAttributes();
Token token = tokens[index++];
termAtt.setTermBuffer(token.term());
termAtt.setEmpty().append(token.term());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
posIncAtt.setPositionIncrement(token.getPositionIncrement());
flagsAtt.setFlags(token.getFlags());

View File

@ -28,7 +28,7 @@ import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.BeforeClass;
import org.junit.Test;
@ -347,19 +347,17 @@ public class TestWordDelimiterFilter extends SolrTestCaseJ4 {
* Set a large position increment gap of 10 if the token is "largegap" or "/"
*/
private final class LargePosIncTokenFilter extends TokenFilter {
private TermAttribute termAtt;
private PositionIncrementAttribute posIncAtt;
private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
protected LargePosIncTokenFilter(TokenStream input) {
super(input);
termAtt = addAttribute(TermAttribute.class);
posIncAtt = addAttribute(PositionIncrementAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (termAtt.term().equals("largegap") || termAtt.term().equals("/"))
if (termAtt.toString().equals("largegap") || termAtt.toString().equals("/"))
posIncAtt.setPositionIncrement(10);
return true;
} else {

View File

@ -19,11 +19,11 @@ package org.apache.solr.spelling;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import java.util.Collection;
@ -43,18 +43,18 @@ class SimpleQueryConverter extends SpellingQueryConverter{
WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
TokenStream ts = analyzer.tokenStream("", new StringReader(origQuery));
// TODO: support custom attributes
TermAttribute termAtt = (TermAttribute) ts.addAttribute(TermAttribute.class);
OffsetAttribute offsetAtt = (OffsetAttribute) ts.addAttribute(OffsetAttribute.class);
TypeAttribute typeAtt = (TypeAttribute) ts.addAttribute(TypeAttribute.class);
FlagsAttribute flagsAtt = (FlagsAttribute) ts.addAttribute(FlagsAttribute.class);
PayloadAttribute payloadAtt = (PayloadAttribute) ts.addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) ts.addAttribute(PositionIncrementAttribute.class);
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
try {
ts.reset();
while (ts.incrementToken()){
Token tok = new Token();
tok.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
tok.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
tok.setFlags(flagsAtt.getFlags());
tok.setPayload(payloadAtt.getPayload());

View File

@ -213,17 +213,17 @@
final Iterator<Token> iter = tokens.iterator();
tstream = filtfac.create( new TokenStream() {
TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute (OffsetAttribute.class);
TypeAttribute typeAtt = (TypeAttribute) addAttribute (TypeAttribute.class);
FlagsAttribute flagsAtt = (FlagsAttribute) addAttribute (FlagsAttribute.class);
PayloadAttribute payloadAtt = (PayloadAttribute) addAttribute (PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) addAttribute (PositionIncrementAttribute.class);
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = addAttribute (OffsetAttribute.class);
TypeAttribute typeAtt = addAttribute (TypeAttribute.class);
FlagsAttribute flagsAtt = addAttribute (FlagsAttribute.class);
PayloadAttribute payloadAtt = addAttribute (PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = addAttribute (PositionIncrementAttribute.class);
public boolean incrementToken() throws IOException {
if (iter.hasNext()) {
Token token = iter.next();
termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
termAtt.copyBuffer(token.termBuffer(), 0, token.termLength());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
typeAtt.setType(token.type());
flagsAtt.setFlags(token.getFlags());
@ -255,19 +255,19 @@
static List<Token> getTokens(TokenStream tstream) throws IOException {
List<Token> tokens = new ArrayList<Token>();
TermAttribute termAtt = (TermAttribute) tstream.addAttribute(TermAttribute.class);
OffsetAttribute offsetAtt = (OffsetAttribute) tstream.addAttribute (OffsetAttribute.class);
TypeAttribute typeAtt = (TypeAttribute) tstream.addAttribute (TypeAttribute.class);
FlagsAttribute flagsAtt = (FlagsAttribute) tstream.addAttribute (FlagsAttribute.class);
PayloadAttribute payloadAtt = (PayloadAttribute) tstream.addAttribute (PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) tstream.addAttribute (PositionIncrementAttribute.class);
CharTermAttribute termAtt = tstream.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = tstream.addAttribute (OffsetAttribute.class);
TypeAttribute typeAtt = tstream.addAttribute (TypeAttribute.class);
FlagsAttribute flagsAtt = tstream.addAttribute (FlagsAttribute.class);
PayloadAttribute payloadAtt = tstream.addAttribute (PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = tstream.addAttribute (PositionIncrementAttribute.class);
while (true) {
if (!tstream.incrementToken())
break;
else {
Token token = new Token();
token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
token.setType(typeAtt.type());
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
token.setPayload(payloadAtt.getPayload());