mirror of https://github.com/apache/lucene.git
SOLR-1876: convert all Solr tokenstreams to CharTermAttribute, make all non-final TokenStreams/Analyzers final
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@932862 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3ae65e769c
commit
9a43d0ee41
|
@ -59,7 +59,8 @@ Upgrading from Solr 1.4
|
|||
"terms" container is a map instead of a named list. This affects
|
||||
response formats like JSON, but not XML. (yonik)
|
||||
|
||||
|
||||
* SOLR-1876: All Analyzers and TokenStreams are now final to enforce
|
||||
the decorator pattern. (rmuir, uschindler)
|
||||
|
||||
Detailed Change List
|
||||
----------------------
|
||||
|
|
|
@ -455,6 +455,10 @@
|
|||
|
||||
<formatter classname="${junit.details.formatter}" usefile="false" if="junit.details"/>
|
||||
<classpath refid="test.run.classpath"/>
|
||||
<assertions>
|
||||
<enable package="org.apache.lucene"/>
|
||||
<enable package="org.apache.solr"/>
|
||||
</assertions>
|
||||
<formatter type="${junit.formatter}"/>
|
||||
<batchtest fork="yes" todir="${junit.output.dir}" if="runall">
|
||||
<fileset dir="src/test" includes="**/Test@{pattern}*.java,**/@{pattern}*Test.java"/>
|
||||
|
|
|
@ -139,6 +139,10 @@
|
|||
>
|
||||
<formatter type="brief" usefile="false" if="junit.details"/>
|
||||
<classpath refid="test.classpath"/>
|
||||
<assertions>
|
||||
<enable package="org.apache.lucene"/>
|
||||
<enable package="org.apache.solr"/>
|
||||
</assertions>
|
||||
<formatter type="xml"/>
|
||||
<batchtest fork="yes" todir="${junit.output.dir}" unless="testcase">
|
||||
<fileset dir="src/test/java" includes="${junit.includes}">
|
||||
|
|
|
@ -162,6 +162,10 @@
|
|||
>
|
||||
<formatter type="brief" usefile="false" if="junit.details"/>
|
||||
<classpath refid="test.extras.classpath"/>
|
||||
<assertions>
|
||||
<enable package="org.apache.lucene"/>
|
||||
<enable package="org.apache.solr"/>
|
||||
</assertions>
|
||||
<formatter type="xml"/>
|
||||
<batchtest fork="yes" todir="${junit.output.dir}" unless="testcase">
|
||||
<fileset dir="src/extras/test/java" includes="${junit.includes}"/>
|
||||
|
|
|
@ -88,6 +88,10 @@
|
|||
>
|
||||
<formatter type="brief" usefile="false" if="junit.details"/>
|
||||
<classpath refid="test.classpath"/>
|
||||
<assertions>
|
||||
<enable package="org.apache.lucene"/>
|
||||
<enable package="org.apache.solr"/>
|
||||
</assertions>
|
||||
<formatter type="xml"/>
|
||||
<batchtest fork="yes" todir="${junit.output.dir}" unless="testcase">
|
||||
<fileset dir="src/test/java" includes="${junit.includes}"/>
|
||||
|
|
|
@ -87,6 +87,10 @@
|
|||
<sysproperty key="java.util.logging.config.file" value="${common-solr.dir}/testlogging.properties"/>
|
||||
<formatter type="brief" usefile="false"/>
|
||||
<classpath refid="test.classpath"/>
|
||||
<assertions>
|
||||
<enable package="org.apache.lucene"/>
|
||||
<enable package="org.apache.solr"/>
|
||||
</assertions>
|
||||
<!--<formatter type="xml" usefile="false"/>-->
|
||||
<batchtest fork="yes" todir="${junit.output.dir}" unless="testcase">
|
||||
<fileset dir="src/test" includes="${junit.includes}"/>
|
||||
|
|
|
@ -20,11 +20,11 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.AttributeSource; // javadoc @link
|
||||
|
||||
|
@ -73,7 +73,7 @@ public abstract class BufferedTokenStream extends TokenFilter {
|
|||
private final LinkedList<Token> inQueue = new LinkedList<Token>();
|
||||
private final LinkedList<Token> outQueue = new LinkedList<Token>();
|
||||
|
||||
private final TermAttribute termAtt = addAttribute(TermAttribute.class);
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
|
||||
private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
|
||||
|
@ -150,7 +150,7 @@ public abstract class BufferedTokenStream extends TokenFilter {
|
|||
return null;
|
||||
} else {
|
||||
Token token = new Token();
|
||||
token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
|
||||
token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
|
||||
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
|
||||
token.setType(typeAtt.type());
|
||||
token.setFlags(flagsAtt.getFlags());
|
||||
|
@ -163,7 +163,7 @@ public abstract class BufferedTokenStream extends TokenFilter {
|
|||
/** old api emulation for back compat */
|
||||
private boolean writeToken(Token token) throws IOException {
|
||||
clearAttributes();
|
||||
termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
|
||||
termAtt.copyBuffer(token.termBuffer(), 0, token.termLength());
|
||||
offsetAtt.setOffset(token.startOffset(), token.endOffset());
|
||||
typeAtt.setType(token.type());
|
||||
flagsAtt.setFlags(token.getFlags());
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
package org.apache.solr.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
@ -188,22 +188,21 @@ public class CapitalizationFilterFactory extends BaseTokenFilterFactory {
|
|||
* <p/>
|
||||
* This is package protected since it is not useful without the Factory
|
||||
*/
|
||||
class CapitalizationFilter extends TokenFilter {
|
||||
final class CapitalizationFilter extends TokenFilter {
|
||||
private final CapitalizationFilterFactory factory;
|
||||
private final TermAttribute termAtt;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
||||
public CapitalizationFilter(TokenStream in, final CapitalizationFilterFactory factory) {
|
||||
super(in);
|
||||
this.factory = factory;
|
||||
this.termAtt = addAttribute(TermAttribute.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (!input.incrementToken()) return false;
|
||||
|
||||
char[] termBuffer = termAtt.termBuffer();
|
||||
int termBufferLength = termAtt.termLength();
|
||||
char[] termBuffer = termAtt.buffer();
|
||||
int termBufferLength = termAtt.length();
|
||||
char[] backup = null;
|
||||
if (factory.maxWordCount < CapitalizationFilterFactory.DEFAULT_MAX_WORD_COUNT) {
|
||||
//make a backup in case we exceed the word count
|
||||
|
@ -232,7 +231,7 @@ class CapitalizationFilter extends TokenFilter {
|
|||
}
|
||||
|
||||
if (wordCount > factory.maxWordCount) {
|
||||
termAtt.setTermBuffer(backup, 0, termBufferLength);
|
||||
termAtt.copyBuffer(backup, 0, termBufferLength);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ import org.apache.lucene.analysis.TokenFilter;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
@ -52,7 +52,7 @@ public final class CommonGramsFilter extends TokenFilter {
|
|||
|
||||
private final StringBuilder buffer = new StringBuilder();
|
||||
|
||||
private final TermAttribute termAttribute = addAttribute(TermAttribute.class);
|
||||
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
|
||||
private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
|
||||
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
|
||||
|
@ -231,7 +231,7 @@ public final class CommonGramsFilter extends TokenFilter {
|
|||
* @return {@code true} if the current token is a common term, {@code false} otherwise
|
||||
*/
|
||||
private boolean isCommon() {
|
||||
return commonWords != null && commonWords.contains(termAttribute.termBuffer(), 0, termAttribute.termLength());
|
||||
return commonWords != null && commonWords.contains(termAttribute.buffer(), 0, termAttribute.length());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -239,7 +239,7 @@ public final class CommonGramsFilter extends TokenFilter {
|
|||
*/
|
||||
private void saveTermBuffer() {
|
||||
buffer.setLength(0);
|
||||
buffer.append(termAttribute.termBuffer(), 0, termAttribute.termLength());
|
||||
buffer.append(termAttribute.buffer(), 0, termAttribute.length());
|
||||
buffer.append(SEPARATOR);
|
||||
lastStartOffset = offsetAttribute.startOffset();
|
||||
lastWasCommon = isCommon();
|
||||
|
@ -249,19 +249,19 @@ public final class CommonGramsFilter extends TokenFilter {
|
|||
* Constructs a compound token.
|
||||
*/
|
||||
private void gramToken() {
|
||||
buffer.append(termAttribute.termBuffer(), 0, termAttribute.termLength());
|
||||
buffer.append(termAttribute.buffer(), 0, termAttribute.length());
|
||||
int endOffset = offsetAttribute.endOffset();
|
||||
|
||||
clearAttributes();
|
||||
|
||||
int length = buffer.length();
|
||||
char termText[] = termAttribute.termBuffer();
|
||||
char termText[] = termAttribute.buffer();
|
||||
if (length > termText.length) {
|
||||
termText = termAttribute.resizeTermBuffer(length);
|
||||
termText = termAttribute.resizeBuffer(length);
|
||||
}
|
||||
|
||||
buffer.getChars(0, length, termText, 0);
|
||||
termAttribute.setTermLength(length);
|
||||
termAttribute.setLength(length);
|
||||
posIncAttribute.setPositionIncrement(0);
|
||||
offsetAttribute.setOffset(lastStartOffset, endOffset);
|
||||
typeAttribute.setType(GRAM_TYPE);
|
||||
|
|
|
@ -22,25 +22,23 @@ import java.util.LinkedList;
|
|||
import org.apache.commons.codec.language.DoubleMetaphone;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
||||
public class DoubleMetaphoneFilter extends TokenFilter {
|
||||
public final class DoubleMetaphoneFilter extends TokenFilter {
|
||||
|
||||
private static final String TOKEN_TYPE = "DoubleMetaphone";
|
||||
|
||||
private final LinkedList<State> remainingTokens = new LinkedList<State>();
|
||||
private final DoubleMetaphone encoder = new DoubleMetaphone();
|
||||
private final boolean inject;
|
||||
private final TermAttribute termAtt;
|
||||
private final PositionIncrementAttribute posAtt;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
protected DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, boolean inject) {
|
||||
super(input);
|
||||
this.encoder.setMaxCodeLen(maxCodeLength);
|
||||
this.inject = inject;
|
||||
this.termAtt = addAttribute(TermAttribute.class);
|
||||
this.posAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -55,12 +53,12 @@ public class DoubleMetaphoneFilter extends TokenFilter {
|
|||
|
||||
if (!input.incrementToken()) return false;
|
||||
|
||||
int len = termAtt.termLength();
|
||||
int len = termAtt.length();
|
||||
if (len==0) return true; // pass through zero length terms
|
||||
|
||||
int firstAlternativeIncrement = inject ? 0 : posAtt.getPositionIncrement();
|
||||
|
||||
String v = new String(termAtt.termBuffer(), 0, len);
|
||||
String v = termAtt.toString();
|
||||
String primaryPhoneticValue = encoder.doubleMetaphone(v);
|
||||
String alternatePhoneticValue = encoder.doubleMetaphone(v, true);
|
||||
|
||||
|
@ -74,7 +72,7 @@ public class DoubleMetaphoneFilter extends TokenFilter {
|
|||
}
|
||||
posAtt.setPositionIncrement( firstAlternativeIncrement );
|
||||
firstAlternativeIncrement = 0;
|
||||
termAtt.setTermBuffer(primaryPhoneticValue);
|
||||
termAtt.setEmpty().append(primaryPhoneticValue);
|
||||
saveState = true;
|
||||
}
|
||||
|
||||
|
@ -86,7 +84,7 @@ public class DoubleMetaphoneFilter extends TokenFilter {
|
|||
saveState = false;
|
||||
}
|
||||
posAtt.setPositionIncrement( firstAlternativeIncrement );
|
||||
termAtt.setTermBuffer(alternatePhoneticValue);
|
||||
termAtt.setEmpty().append(alternatePhoneticValue);
|
||||
saveState = true;
|
||||
}
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
/**
|
||||
* When the plain text is extracted from documents, we will often have many words hyphenated and broken into
|
||||
|
@ -54,7 +54,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
|||
*/
|
||||
public final class HyphenatedWordsFilter extends TokenFilter {
|
||||
|
||||
private final TermAttribute termAttribute = addAttribute(TermAttribute.class);
|
||||
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
|
||||
|
||||
private final StringBuilder hyphenated = new StringBuilder();
|
||||
|
@ -75,8 +75,8 @@ public final class HyphenatedWordsFilter extends TokenFilter {
|
|||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
while (input.incrementToken()) {
|
||||
char[] term = termAttribute.termBuffer();
|
||||
int termLength = termAttribute.termLength();
|
||||
char[] term = termAttribute.buffer();
|
||||
int termLength = termAttribute.length();
|
||||
|
||||
if (termLength > 0 && term[termLength - 1] == '-') {
|
||||
// a hyphenated word
|
||||
|
@ -128,14 +128,14 @@ public final class HyphenatedWordsFilter extends TokenFilter {
|
|||
restoreState(savedState);
|
||||
savedState = null;
|
||||
|
||||
char term[] = termAttribute.termBuffer();
|
||||
char term[] = termAttribute.buffer();
|
||||
int length = hyphenated.length();
|
||||
if (length > termAttribute.termLength()) {
|
||||
term = termAttribute.resizeTermBuffer(length);
|
||||
if (length > termAttribute.length()) {
|
||||
term = termAttribute.resizeBuffer(length);
|
||||
}
|
||||
|
||||
hyphenated.getChars(0, length, term, 0);
|
||||
termAttribute.setTermLength(length);
|
||||
termAttribute.setLength(length);
|
||||
offsetAttribute.setOffset(offsetAttribute.startOffset(), endOffset);
|
||||
hyphenated.setLength(0);
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
@ -34,7 +34,7 @@ import java.util.Set;
|
|||
*/
|
||||
public final class KeepWordFilter extends TokenFilter {
|
||||
private final CharArraySet words;
|
||||
private final TermAttribute termAtt;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
||||
/** @deprecated Use {@link #KeepWordFilter(TokenStream, Set, boolean)} instead */
|
||||
@Deprecated
|
||||
|
@ -47,13 +47,12 @@ public final class KeepWordFilter extends TokenFilter {
|
|||
public KeepWordFilter(TokenStream in, CharArraySet words) {
|
||||
super(in);
|
||||
this.words = words;
|
||||
this.termAtt = addAttribute(TermAttribute.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
while (input.incrementToken()) {
|
||||
if (words.contains(termAtt.termBuffer(), 0, termAtt.termLength())) return true;
|
||||
if (words.contains(termAtt.buffer(), 0, termAtt.length())) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ public final class PatternReplaceFilter extends TokenFilter {
|
|||
private final Pattern p;
|
||||
private final String replacement;
|
||||
private final boolean all;
|
||||
private final CharTermAttribute termAtt;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final Matcher m;
|
||||
|
||||
/**
|
||||
|
@ -64,7 +64,6 @@ public final class PatternReplaceFilter extends TokenFilter {
|
|||
this.p=p;
|
||||
this.replacement = (null == replacement) ? "" : replacement;
|
||||
this.all=all;
|
||||
this.termAtt = addAttribute(CharTermAttribute.class);
|
||||
this.m = p.matcher(termAtt);
|
||||
}
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ import java.io.Reader;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
||||
|
@ -56,7 +56,7 @@ import org.apache.commons.io.IOUtils;
|
|||
*/
|
||||
public final class PatternTokenizer extends Tokenizer {
|
||||
|
||||
private final TermAttribute termAtt = addAttribute(TermAttribute.class);
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
|
||||
private String str;
|
||||
|
@ -86,7 +86,7 @@ public final class PatternTokenizer extends Tokenizer {
|
|||
while (matcher.find()) {
|
||||
final String match = matcher.group(group);
|
||||
if (match.length() == 0) continue;
|
||||
termAtt.setTermBuffer(match);
|
||||
termAtt.setEmpty().append(match);
|
||||
index = matcher.start(group);
|
||||
offsetAtt.setOffset(correctOffset(index), correctOffset(matcher.end(group)));
|
||||
return true;
|
||||
|
@ -101,7 +101,7 @@ public final class PatternTokenizer extends Tokenizer {
|
|||
while (matcher.find()) {
|
||||
if (matcher.start() - index > 0) {
|
||||
// found a non-zero-length token
|
||||
termAtt.setTermBuffer(str, index, matcher.start() - index);
|
||||
termAtt.setEmpty().append(str, index, matcher.start());
|
||||
offsetAtt.setOffset(correctOffset(index), correctOffset(matcher.start()));
|
||||
index = matcher.end();
|
||||
return true;
|
||||
|
@ -115,7 +115,7 @@ public final class PatternTokenizer extends Tokenizer {
|
|||
return false;
|
||||
}
|
||||
|
||||
termAtt.setTermBuffer(str, index, str.length() - index);
|
||||
termAtt.setEmpty().append(str, index, str.length());
|
||||
offsetAtt.setOffset(correctOffset(index), correctOffset(str.length()));
|
||||
index = Integer.MAX_VALUE; // mark exhausted
|
||||
return true;
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.solr.analysis;
|
|||
import org.apache.commons.codec.Encoder;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -31,23 +31,21 @@ import java.io.IOException;
|
|||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
public class PhoneticFilter extends TokenFilter
|
||||
public final class PhoneticFilter extends TokenFilter
|
||||
{
|
||||
protected boolean inject = true;
|
||||
protected Encoder encoder = null;
|
||||
protected String name = null;
|
||||
|
||||
protected State save = null;
|
||||
private final TermAttribute termAtt;
|
||||
private final PositionIncrementAttribute posAtt;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
public PhoneticFilter(TokenStream in, Encoder encoder, String name, boolean inject) {
|
||||
super(in);
|
||||
this.encoder = encoder;
|
||||
this.name = name;
|
||||
this.inject = inject;
|
||||
this.termAtt = addAttribute(TermAttribute.class);
|
||||
this.posAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
this.inject = inject;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -62,9 +60,9 @@ public class PhoneticFilter extends TokenFilter
|
|||
if (!input.incrementToken()) return false;
|
||||
|
||||
// pass through zero-length terms
|
||||
if (termAtt.termLength()==0) return true;
|
||||
if (termAtt.length() == 0) return true;
|
||||
|
||||
String value = termAtt.term();
|
||||
String value = termAtt.toString();
|
||||
String phonetic = null;
|
||||
try {
|
||||
String v = encoder.encode(value).toString();
|
||||
|
@ -75,7 +73,7 @@ public class PhoneticFilter extends TokenFilter
|
|||
|
||||
if (!inject) {
|
||||
// just modify this token
|
||||
termAtt.setTermBuffer(phonetic);
|
||||
termAtt.setEmpty().append(phonetic);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -88,7 +86,7 @@ public class PhoneticFilter extends TokenFilter
|
|||
save = captureState();
|
||||
|
||||
posAtt.setPositionIncrement(origOffset);
|
||||
termAtt.setTermBuffer(phonetic);
|
||||
termAtt.setEmpty().append(phonetic);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -31,7 +31,7 @@ import java.io.IOException;
|
|||
*/
|
||||
public final class RemoveDuplicatesTokenFilter extends TokenFilter {
|
||||
|
||||
private final TermAttribute termAttribute = addAttribute(TermAttribute.class);
|
||||
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
||||
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
// use a fixed version, as we don't care about case sensitivity.
|
||||
|
@ -52,8 +52,8 @@ public final class RemoveDuplicatesTokenFilter extends TokenFilter {
|
|||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
while (input.incrementToken()) {
|
||||
final char term[] = termAttribute.termBuffer();
|
||||
final int length = termAttribute.termLength();
|
||||
final char term[] = termAttribute.buffer();
|
||||
final int length = termAttribute.length();
|
||||
final int posIncrement = posIncAttribute.getPositionIncrement();
|
||||
|
||||
if (posIncrement > 0) {
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
|||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
/**
|
||||
* This class produces a special form of reversed tokens, suitable for
|
||||
|
@ -35,17 +35,17 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
|||
* <code>withOriginal == true</code>, which proportionally increases the size
|
||||
* of postings and term dictionary in the index.
|
||||
*/
|
||||
public class ReversedWildcardFilter extends TokenFilter {
|
||||
public final class ReversedWildcardFilter extends TokenFilter {
|
||||
|
||||
private boolean withOriginal;
|
||||
private char markerChar;
|
||||
private State save;
|
||||
private TermAttribute termAtt;
|
||||
private CharTermAttribute termAtt;
|
||||
private PositionIncrementAttribute posAtt;
|
||||
|
||||
protected ReversedWildcardFilter(TokenStream input, boolean withOriginal, char markerChar) {
|
||||
super(input);
|
||||
this.termAtt = addAttribute(TermAttribute.class);
|
||||
this.termAtt = addAttribute(CharTermAttribute.class);
|
||||
this.posAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
this.withOriginal = withOriginal;
|
||||
this.markerChar = markerChar;
|
||||
|
@ -63,19 +63,19 @@ public class ReversedWildcardFilter extends TokenFilter {
|
|||
if (!input.incrementToken()) return false;
|
||||
|
||||
// pass through zero-length terms
|
||||
int oldLen = termAtt.termLength();
|
||||
int oldLen = termAtt.length();
|
||||
if (oldLen ==0) return true;
|
||||
int origOffset = posAtt.getPositionIncrement();
|
||||
if (withOriginal == true){
|
||||
posAtt.setPositionIncrement(0);
|
||||
save = captureState();
|
||||
}
|
||||
char [] buffer = termAtt.resizeTermBuffer(oldLen + 1);
|
||||
char [] buffer = termAtt.resizeBuffer(oldLen + 1);
|
||||
buffer[oldLen] = markerChar;
|
||||
reverse(buffer, 0, oldLen + 1);
|
||||
|
||||
posAtt.setPositionIncrement(origOffset);
|
||||
termAtt.setTermBuffer(buffer, 0, oldLen +1);
|
||||
termAtt.copyBuffer(buffer, 0, oldLen +1);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenFilter;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
||||
|
@ -41,7 +41,7 @@ import java.util.LinkedList;
|
|||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
public class SynonymFilter extends TokenFilter {
|
||||
public final class SynonymFilter extends TokenFilter {
|
||||
|
||||
private final SynonymMap map; // Map<String, SynonymMap>
|
||||
private Iterator<AttributeSource> replacement; // iterator over generated tokens
|
||||
|
@ -50,7 +50,7 @@ public class SynonymFilter extends TokenFilter {
|
|||
super(in);
|
||||
this.map = map;
|
||||
// just ensuring these attributes exist...
|
||||
addAttribute(TermAttribute.class);
|
||||
addAttribute(CharTermAttribute.class);
|
||||
addAttribute(PositionIncrementAttribute.class);
|
||||
addAttribute(OffsetAttribute.class);
|
||||
addAttribute(TypeAttribute.class);
|
||||
|
@ -87,8 +87,8 @@ public class SynonymFilter extends TokenFilter {
|
|||
// common case fast-path of first token not matching anything
|
||||
AttributeSource firstTok = nextTok();
|
||||
if (firstTok == null) return false;
|
||||
TermAttribute termAtt = firstTok.addAttribute(TermAttribute.class);
|
||||
SynonymMap result = map.submap!=null ? map.submap.get(termAtt.termBuffer(), 0, termAtt.termLength()) : null;
|
||||
CharTermAttribute termAtt = firstTok.addAttribute(CharTermAttribute.class);
|
||||
SynonymMap result = map.submap!=null ? map.submap.get(termAtt.buffer(), 0, termAtt.length()) : null;
|
||||
if (result == null) {
|
||||
copy(this, firstTok);
|
||||
return true;
|
||||
|
@ -128,14 +128,14 @@ public class SynonymFilter extends TokenFilter {
|
|||
for (int i=0; i<result.synonyms.length; i++) {
|
||||
Token repTok = result.synonyms[i];
|
||||
AttributeSource newTok = firstTok.cloneAttributes();
|
||||
TermAttribute newTermAtt = newTok.addAttribute(TermAttribute.class);
|
||||
CharTermAttribute newTermAtt = newTok.addAttribute(CharTermAttribute.class);
|
||||
OffsetAttribute newOffsetAtt = newTok.addAttribute(OffsetAttribute.class);
|
||||
PositionIncrementAttribute newPosIncAtt = newTok.addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
OffsetAttribute lastOffsetAtt = lastTok.addAttribute(OffsetAttribute.class);
|
||||
|
||||
newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset());
|
||||
newTermAtt.setTermBuffer(repTok.termBuffer(), 0, repTok.termLength());
|
||||
newTermAtt.copyBuffer(repTok.termBuffer(), 0, repTok.termLength());
|
||||
repPos += repTok.getPositionIncrement();
|
||||
if (i==0) repPos=origPos; // make position of first token equal to original
|
||||
|
||||
|
@ -215,8 +215,8 @@ public class SynonymFilter extends TokenFilter {
|
|||
if (tok == this)
|
||||
tok = cloneAttributes();
|
||||
// check for positionIncrement!=1? if>1, should not match, if==0, check multiple at this level?
|
||||
TermAttribute termAtt = tok.getAttribute(TermAttribute.class);
|
||||
SynonymMap subMap = map.submap.get(termAtt.termBuffer(), 0, termAtt.termLength());
|
||||
CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
|
||||
SynonymMap subMap = map.submap.get(termAtt.buffer(), 0, termAtt.length());
|
||||
|
||||
if (subMap != null) {
|
||||
// recurse
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
package org.apache.solr.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.solr.common.ResourceLoader;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.util.plugin.ResourceLoaderAware;
|
||||
|
@ -135,11 +135,10 @@ public class SynonymFilterFactory extends BaseTokenFilterFactory implements Reso
|
|||
TokenStream ts = loadTokenizer(tokFactory, reader);
|
||||
List<String> tokList = new ArrayList<String>();
|
||||
try {
|
||||
TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
while (ts.incrementToken()){
|
||||
String text = new String(termAtt.termBuffer(), 0, termAtt.termLength());
|
||||
if( text.length() > 0 )
|
||||
tokList.add( text );
|
||||
if( termAtt.length() > 0 )
|
||||
tokList.add( termAtt.toString() );
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
|
|
|
@ -32,7 +32,7 @@ import java.io.Reader;
|
|||
// An analyzer that uses a tokenizer and a list of token filters to
|
||||
// create a TokenStream.
|
||||
//
|
||||
public class TokenizerChain extends SolrAnalyzer {
|
||||
public final class TokenizerChain extends SolrAnalyzer {
|
||||
final private CharFilterFactory[] charFilters;
|
||||
final private TokenizerFactory tokenizer;
|
||||
final private TokenFilterFactory[] filters;
|
||||
|
|
|
@ -51,7 +51,7 @@ public class TrieTokenizerFactory extends BaseTokenizerFactory {
|
|||
}
|
||||
}
|
||||
|
||||
class TrieTokenizer extends Tokenizer {
|
||||
final class TrieTokenizer extends Tokenizer {
|
||||
protected static final DateField dateField = new DateField();
|
||||
protected final int precisionStep;
|
||||
protected final TrieTypes type;
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.solr.analysis;
|
|||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -32,24 +32,21 @@ import java.io.IOException;
|
|||
public final class TrimFilter extends TokenFilter {
|
||||
|
||||
final boolean updateOffsets;
|
||||
private final TermAttribute termAtt;
|
||||
private final OffsetAttribute offsetAtt;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
|
||||
|
||||
public TrimFilter(TokenStream in, boolean updateOffsets) {
|
||||
super(in);
|
||||
this.updateOffsets = updateOffsets;
|
||||
|
||||
this.termAtt = addAttribute(TermAttribute.class);
|
||||
this.offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (!input.incrementToken()) return false;
|
||||
|
||||
char[] termBuffer = termAtt.termBuffer();
|
||||
int len = termAtt.termLength();
|
||||
char[] termBuffer = termAtt.buffer();
|
||||
int len = termAtt.length();
|
||||
//TODO: Is this the right behavior or should we return false? Currently, " ", returns true, so I think this should
|
||||
//also return true
|
||||
if (len == 0){
|
||||
|
@ -69,9 +66,9 @@ public final class TrimFilter extends TokenFilter {
|
|||
}
|
||||
if (start > 0 || end < len) {
|
||||
if (start < end) {
|
||||
termAtt.setTermBuffer(termBuffer, start, (end - start));
|
||||
termAtt.copyBuffer(termBuffer, start, (end - start));
|
||||
} else {
|
||||
termAtt.setTermLength(0);
|
||||
termAtt.setEmpty();
|
||||
}
|
||||
if (updateOffsets) {
|
||||
int newStart = offsetAtt.startOffset()+start;
|
||||
|
|
|
@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
@ -120,7 +120,7 @@ final class WordDelimiterFilter extends TokenFilter {
|
|||
*/
|
||||
final CharArraySet protWords;
|
||||
|
||||
private final TermAttribute termAttribute = addAttribute(TermAttribute.class);
|
||||
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
|
||||
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
|
||||
private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
|
||||
|
@ -338,8 +338,8 @@ final class WordDelimiterFilter extends TokenFilter {
|
|||
return false;
|
||||
}
|
||||
|
||||
int termLength = termAttribute.termLength();
|
||||
char[] termBuffer = termAttribute.termBuffer();
|
||||
int termLength = termAttribute.length();
|
||||
char[] termBuffer = termAttribute.buffer();
|
||||
|
||||
accumPosInc += posIncAttribute.getPositionIncrement();
|
||||
|
||||
|
@ -462,14 +462,14 @@ final class WordDelimiterFilter extends TokenFilter {
|
|||
savedStartOffset = offsetAttribute.startOffset();
|
||||
savedEndOffset = offsetAttribute.endOffset();
|
||||
// if length by start + end offsets doesn't match the term text then assume this is a synonym and don't adjust the offsets.
|
||||
hasIllegalOffsets = (savedEndOffset - savedStartOffset != termAttribute.termLength());
|
||||
hasIllegalOffsets = (savedEndOffset - savedStartOffset != termAttribute.length());
|
||||
savedType = typeAttribute.type();
|
||||
|
||||
if (savedBuffer.length < termAttribute.termLength()) {
|
||||
savedBuffer = new char[ArrayUtil.oversize(termAttribute.termLength(), RamUsageEstimator.NUM_BYTES_CHAR)];
|
||||
if (savedBuffer.length < termAttribute.length()) {
|
||||
savedBuffer = new char[ArrayUtil.oversize(termAttribute.length(), RamUsageEstimator.NUM_BYTES_CHAR)];
|
||||
}
|
||||
|
||||
System.arraycopy(termAttribute.termBuffer(), 0, savedBuffer, 0, termAttribute.termLength());
|
||||
System.arraycopy(termAttribute.buffer(), 0, savedBuffer, 0, termAttribute.length());
|
||||
iterator.text = savedBuffer;
|
||||
|
||||
hasSavedState = true;
|
||||
|
@ -531,7 +531,7 @@ final class WordDelimiterFilter extends TokenFilter {
|
|||
*/
|
||||
private void generatePart(boolean isSingleWord) {
|
||||
clearAttributes();
|
||||
termAttribute.setTermBuffer(savedBuffer, iterator.current, iterator.end - iterator.current);
|
||||
termAttribute.copyBuffer(savedBuffer, iterator.current, iterator.end - iterator.current);
|
||||
|
||||
int startOffSet = (isSingleWord || !hasIllegalOffsets) ? savedStartOffset + iterator.current : savedStartOffset;
|
||||
int endOffSet = (hasIllegalOffsets) ? savedEndOffset : savedStartOffset + iterator.end;
|
||||
|
@ -636,13 +636,13 @@ final class WordDelimiterFilter extends TokenFilter {
|
|||
*/
|
||||
void write() {
|
||||
clearAttributes();
|
||||
if (termAttribute.termLength() < buffer.length()) {
|
||||
termAttribute.resizeTermBuffer(buffer.length());
|
||||
if (termAttribute.length() < buffer.length()) {
|
||||
termAttribute.resizeBuffer(buffer.length());
|
||||
}
|
||||
char termbuffer[] = termAttribute.termBuffer();
|
||||
char termbuffer[] = termAttribute.buffer();
|
||||
|
||||
buffer.getChars(0, buffer.length(), termbuffer, 0);
|
||||
termAttribute.setTermLength(buffer.length());
|
||||
termAttribute.setLength(buffer.length());
|
||||
|
||||
if (hasIllegalOffsets) {
|
||||
offsetAttribute.setOffset(savedStartOffset, savedEndOffset);
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.solr.handler;
|
|||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -135,10 +134,10 @@ public class AnalysisRequestHandler extends RequestHandlerBase {
|
|||
// outer is namedList since order of tokens is important
|
||||
NamedList<NamedList<Object>> tokens = new NamedList<NamedList<Object>>();
|
||||
// TODO: support custom attributes
|
||||
TermAttribute termAtt = null;
|
||||
CharTermAttribute termAtt = null;
|
||||
TermToBytesRefAttribute bytesAtt = null;
|
||||
if (tstream.hasAttribute(TermAttribute.class)) {
|
||||
termAtt = tstream.getAttribute(TermAttribute.class);
|
||||
if (tstream.hasAttribute(CharTermAttribute.class)) {
|
||||
termAtt = tstream.getAttribute(CharTermAttribute.class);
|
||||
} else if (tstream.hasAttribute(TermToBytesRefAttribute.class)) {
|
||||
bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class);
|
||||
}
|
||||
|
@ -151,7 +150,7 @@ public class AnalysisRequestHandler extends RequestHandlerBase {
|
|||
NamedList<Object> token = new SimpleOrderedMap<Object>();
|
||||
tokens.add("token", token);
|
||||
if (termAtt != null) {
|
||||
token.add("value", termAtt.term());
|
||||
token.add("value", termAtt.toString());
|
||||
}
|
||||
if (bytesAtt != null) {
|
||||
bytesAtt.toBytesRef(bytes);
|
||||
|
|
|
@ -145,10 +145,10 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
List<Token> tokens = new ArrayList<Token>();
|
||||
|
||||
// TODO change this API to support custom attributes
|
||||
TermAttribute termAtt = null;
|
||||
CharTermAttribute termAtt = null;
|
||||
TermToBytesRefAttribute bytesAtt = null;
|
||||
if (tokenStream.hasAttribute(TermAttribute.class)) {
|
||||
termAtt = tokenStream.getAttribute(TermAttribute.class);
|
||||
if (tokenStream.hasAttribute(CharTermAttribute.class)) {
|
||||
termAtt = tokenStream.getAttribute(CharTermAttribute.class);
|
||||
} else if (tokenStream.hasAttribute(TermToBytesRefAttribute.class)) {
|
||||
bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
|
||||
}
|
||||
|
@ -163,7 +163,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
while (tokenStream.incrementToken()) {
|
||||
Token token = new Token();
|
||||
if (termAtt != null) {
|
||||
token.setTermBuffer(termAtt.term());
|
||||
token.setTermBuffer(termAtt.toString());
|
||||
}
|
||||
if (bytesAtt != null) {
|
||||
bytesAtt.toBytesRef(bytes);
|
||||
|
@ -259,12 +259,12 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
* TokenStream that iterates over a list of pre-existing Tokens
|
||||
*/
|
||||
// TODO refactor to support custom attributes
|
||||
protected static class ListBasedTokenStream extends TokenStream {
|
||||
protected final static class ListBasedTokenStream extends TokenStream {
|
||||
private final List<Token> tokens;
|
||||
private Iterator<Token> tokenIterator;
|
||||
|
||||
private final TermAttribute termAtt = (TermAttribute)
|
||||
addAttribute(TermAttribute.class);
|
||||
private final CharTermAttribute termAtt = (CharTermAttribute)
|
||||
addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = (OffsetAttribute)
|
||||
addAttribute(OffsetAttribute.class);
|
||||
private final TypeAttribute typeAtt = (TypeAttribute)
|
||||
|
@ -292,7 +292,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
public boolean incrementToken() throws IOException {
|
||||
if (tokenIterator.hasNext()) {
|
||||
Token next = tokenIterator.next();
|
||||
termAtt.setTermBuffer(next.termBuffer(), 0, next.termLength());
|
||||
termAtt.copyBuffer(next.termBuffer(), 0, next.termLength());
|
||||
typeAtt.setType(next.type());
|
||||
offsetAtt.setOffset(next.startOffset(), next.endOffset());
|
||||
flagsAtt.setFlags(next.getFlags());
|
||||
|
|
|
@ -39,7 +39,7 @@ import javax.xml.xpath.XPathFactory;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.*;
|
||||
|
@ -298,9 +298,9 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
|
|||
TokenStream tokens = analyzer.reusableTokenStream( "", new StringReader( query ) );
|
||||
tokens.reset();
|
||||
|
||||
TermAttribute termAtt = (TermAttribute) tokens.addAttribute(TermAttribute.class);
|
||||
CharTermAttribute termAtt = tokens.addAttribute(CharTermAttribute.class);
|
||||
while( tokens.incrementToken() ) {
|
||||
norm.append( termAtt.termBuffer(), 0, termAtt.termLength() );
|
||||
norm.append( termAtt.buffer(), 0, termAtt.length() );
|
||||
}
|
||||
return norm.toString();
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -373,16 +373,16 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
TokenStream ts = analyzer.reusableTokenStream("", new StringReader(q));
|
||||
ts.reset();
|
||||
// TODO: support custom attributes
|
||||
TermAttribute termAtt = (TermAttribute) ts.addAttribute(TermAttribute.class);
|
||||
OffsetAttribute offsetAtt = (OffsetAttribute) ts.addAttribute(OffsetAttribute.class);
|
||||
TypeAttribute typeAtt = (TypeAttribute) ts.addAttribute(TypeAttribute.class);
|
||||
FlagsAttribute flagsAtt = (FlagsAttribute) ts.addAttribute(FlagsAttribute.class);
|
||||
PayloadAttribute payloadAtt = (PayloadAttribute) ts.addAttribute(PayloadAttribute.class);
|
||||
PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) ts.addAttribute(PositionIncrementAttribute.class);
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
||||
TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
|
||||
FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
|
||||
PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
|
||||
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
while (ts.incrementToken()){
|
||||
Token token = new Token();
|
||||
token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
|
||||
token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
|
||||
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
|
||||
token.setType(typeAtt.type());
|
||||
token.setFlags(flagsAtt.getFlags());
|
||||
|
|
|
@ -512,7 +512,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
* This is meant to work around fickleness in the highlighter only. It
|
||||
* can mess up token positions and should not be used for indexing or querying.
|
||||
*/
|
||||
class TokenOrderingFilter extends TokenFilter {
|
||||
final class TokenOrderingFilter extends TokenFilter {
|
||||
private final int windowSize;
|
||||
private final LinkedList<OrderedToken> queue = new LinkedList<OrderedToken>();
|
||||
private boolean done=false;
|
||||
|
@ -586,7 +586,7 @@ class TermOffsetsTokenStream {
|
|||
return new MultiValuedStream(length);
|
||||
}
|
||||
|
||||
class MultiValuedStream extends TokenStream {
|
||||
final class MultiValuedStream extends TokenStream {
|
||||
private final int length;
|
||||
OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
|
||||
|
||||
|
|
|
@ -20,11 +20,9 @@ package org.apache.solr.schema;
|
|||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.solr.search.function.ValueSource;
|
||||
import org.apache.solr.search.function.OrdFieldSource;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.response.XMLWriter;
|
||||
|
@ -59,7 +57,7 @@ public class BoolField extends FieldType {
|
|||
protected final static Analyzer boolAnalyzer = new SolrAnalyzer() {
|
||||
public TokenStreamInfo getStream(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new Tokenizer(reader) {
|
||||
final TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
|
||||
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
boolean done = false;
|
||||
|
||||
@Override
|
||||
|
@ -75,7 +73,7 @@ public class BoolField extends FieldType {
|
|||
done = true;
|
||||
int ch = input.read();
|
||||
if (ch==-1) return false;
|
||||
termAtt.setTermBuffer(
|
||||
termAtt.copyBuffer(
|
||||
((ch=='t' || ch=='T' || ch=='1') ? TRUE_TOKEN : FALSE_TOKEN)
|
||||
,0,1);
|
||||
return true;
|
||||
|
|
|
@ -21,7 +21,7 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
@ -369,7 +369,7 @@ public abstract class FieldType extends FieldProperties {
|
|||
* Default analyzer for types that only produce 1 verbatim token...
|
||||
* A maximum size of chars to be read must be specified
|
||||
*/
|
||||
protected class DefaultAnalyzer extends SolrAnalyzer {
|
||||
protected final class DefaultAnalyzer extends SolrAnalyzer {
|
||||
final int maxChars;
|
||||
|
||||
DefaultAnalyzer(int maxChars) {
|
||||
|
@ -379,15 +379,15 @@ public abstract class FieldType extends FieldProperties {
|
|||
public TokenStreamInfo getStream(String fieldName, Reader reader) {
|
||||
Tokenizer ts = new Tokenizer(reader) {
|
||||
final char[] cbuf = new char[maxChars];
|
||||
final TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
|
||||
final OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
|
||||
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
clearAttributes();
|
||||
int n = input.read(cbuf,0,maxChars);
|
||||
if (n<=0) return false;
|
||||
String s = toInternal(new String(cbuf,0,n));
|
||||
termAtt.setTermBuffer(s);
|
||||
termAtt.setEmpty().append(s);
|
||||
offsetAtt.setOffset(correctOffset(0),correctOffset(n));
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.search.MultiPhraseQuery;
|
|||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.CachingTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
@ -88,7 +88,7 @@ public class TextField extends FieldType {
|
|||
source = analyzer.tokenStream(field, new StringReader(queryText));
|
||||
}
|
||||
CachingTokenFilter buffer = new CachingTokenFilter(source);
|
||||
TermAttribute termAtt = null;
|
||||
CharTermAttribute termAtt = null;
|
||||
PositionIncrementAttribute posIncrAtt = null;
|
||||
int numTokens = 0;
|
||||
|
||||
|
@ -100,11 +100,11 @@ public class TextField extends FieldType {
|
|||
// success==false if we hit an exception
|
||||
}
|
||||
if (success) {
|
||||
if (buffer.hasAttribute(TermAttribute.class)) {
|
||||
termAtt = (TermAttribute) buffer.getAttribute(TermAttribute.class);
|
||||
if (buffer.hasAttribute(CharTermAttribute.class)) {
|
||||
termAtt = buffer.getAttribute(CharTermAttribute.class);
|
||||
}
|
||||
if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
|
||||
posIncrAtt = (PositionIncrementAttribute) buffer.getAttribute(PositionIncrementAttribute.class);
|
||||
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -147,7 +147,7 @@ public class TextField extends FieldType {
|
|||
try {
|
||||
boolean hasNext = buffer.incrementToken();
|
||||
assert hasNext == true;
|
||||
term = termAtt.term();
|
||||
term = termAtt.toString();
|
||||
} catch (IOException e) {
|
||||
// safe to ignore, because we know the number of tokens
|
||||
}
|
||||
|
@ -164,7 +164,7 @@ public class TextField extends FieldType {
|
|||
try {
|
||||
boolean hasNext = buffer.incrementToken();
|
||||
assert hasNext == true;
|
||||
term = termAtt.term();
|
||||
term = termAtt.toString();
|
||||
} catch (IOException e) {
|
||||
// safe to ignore, because we know the number of tokens
|
||||
}
|
||||
|
@ -188,7 +188,7 @@ public class TextField extends FieldType {
|
|||
try {
|
||||
boolean hasNext = buffer.incrementToken();
|
||||
assert hasNext == true;
|
||||
term = termAtt.term();
|
||||
term = termAtt.toString();
|
||||
if (posIncrAtt != null) {
|
||||
positionIncrement = posIncrAtt.getPositionIncrement();
|
||||
}
|
||||
|
@ -229,7 +229,7 @@ public class TextField extends FieldType {
|
|||
try {
|
||||
boolean hasNext = buffer.incrementToken();
|
||||
assert hasNext == true;
|
||||
term = termAtt.term();
|
||||
term = termAtt.toString();
|
||||
if (posIncrAtt != null) {
|
||||
positionIncrement = posIncrAtt.getPositionIncrement();
|
||||
}
|
||||
|
|
|
@ -1043,7 +1043,7 @@ class ExtendedDismaxQParser extends QParser {
|
|||
}
|
||||
|
||||
|
||||
class ExtendedAnalyzer extends Analyzer {
|
||||
final class ExtendedAnalyzer extends Analyzer {
|
||||
final Map<String, Analyzer> map = new HashMap<String, Analyzer>();
|
||||
final QParser parser;
|
||||
final Analyzer queryAnalyzer;
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
|
||||
|
||||
|
@ -105,15 +105,15 @@ public class SpellingQueryConverter extends QueryConverter {
|
|||
try {
|
||||
stream = analyzer.reusableTokenStream("", new StringReader(word));
|
||||
// TODO: support custom attributes
|
||||
TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class);
|
||||
FlagsAttribute flagsAtt = (FlagsAttribute) stream.addAttribute(FlagsAttribute.class);
|
||||
TypeAttribute typeAtt = (TypeAttribute) stream.addAttribute(TypeAttribute.class);
|
||||
PayloadAttribute payloadAtt = (PayloadAttribute) stream.addAttribute(PayloadAttribute.class);
|
||||
PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) stream.addAttribute(PositionIncrementAttribute.class);
|
||||
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
|
||||
FlagsAttribute flagsAtt = stream.addAttribute(FlagsAttribute.class);
|
||||
TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
|
||||
PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
|
||||
PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
|
||||
stream.reset();
|
||||
while (stream.incrementToken()) {
|
||||
Token token = new Token();
|
||||
token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
|
||||
token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
|
||||
token.setStartOffset(matcher.start());
|
||||
token.setEndOffset(matcher.end());
|
||||
token.setFlags(flagsAtt.getFlags());
|
||||
|
|
|
@ -24,7 +24,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
/**
|
||||
* Tests CommonGramsQueryFilter
|
||||
|
@ -38,20 +38,20 @@ public class CommonGramsFilterTest extends BaseTokenTestCase {
|
|||
WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
|
||||
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
|
||||
|
||||
TermAttribute term = cgf.addAttribute(TermAttribute.class);
|
||||
CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
|
||||
assertTrue(cgf.incrementToken());
|
||||
assertEquals("How", term.term());
|
||||
assertEquals("How", term.toString());
|
||||
assertTrue(cgf.incrementToken());
|
||||
assertEquals("How_the", term.term());
|
||||
assertEquals("How_the", term.toString());
|
||||
assertTrue(cgf.incrementToken());
|
||||
assertEquals("the", term.term());
|
||||
assertEquals("the", term.toString());
|
||||
assertTrue(cgf.incrementToken());
|
||||
assertEquals("the_s", term.term());
|
||||
assertEquals("the_s", term.toString());
|
||||
|
||||
wt.reset(new StringReader(input));
|
||||
cgf.reset();
|
||||
assertTrue(cgf.incrementToken());
|
||||
assertEquals("How", term.term());
|
||||
assertEquals("How", term.toString());
|
||||
}
|
||||
|
||||
public void testQueryReset() throws Exception {
|
||||
|
@ -60,16 +60,16 @@ public class CommonGramsFilterTest extends BaseTokenTestCase {
|
|||
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
|
||||
CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
|
||||
|
||||
TermAttribute term = wt.addAttribute(TermAttribute.class);
|
||||
CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
|
||||
assertTrue(nsf.incrementToken());
|
||||
assertEquals("How_the", term.term());
|
||||
assertEquals("How_the", term.toString());
|
||||
assertTrue(nsf.incrementToken());
|
||||
assertEquals("the_s", term.term());
|
||||
assertEquals("the_s", term.toString());
|
||||
|
||||
wt.reset(new StringReader(input));
|
||||
nsf.reset();
|
||||
assertTrue(nsf.incrementToken());
|
||||
assertEquals("How_the", term.term());
|
||||
assertEquals("How_the", term.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -22,7 +22,7 @@ import java.util.Map;
|
|||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
public class DoubleMetaphoneFilterFactoryTest extends BaseTokenTestCase {
|
||||
|
||||
|
@ -59,12 +59,12 @@ public class DoubleMetaphoneFilterFactoryTest extends BaseTokenTestCase {
|
|||
TokenStream inputStream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("international"));
|
||||
|
||||
TokenStream filteredStream = factory.create(inputStream);
|
||||
TermAttribute termAtt = filteredStream.addAttribute(TermAttribute.class);
|
||||
CharTermAttribute termAtt = filteredStream.addAttribute(CharTermAttribute.class);
|
||||
assertEquals(DoubleMetaphoneFilter.class, filteredStream.getClass());
|
||||
|
||||
assertTrue(filteredStream.incrementToken());
|
||||
assertEquals(13, termAtt.termLength());
|
||||
assertEquals("international", termAtt.term());
|
||||
assertEquals(13, termAtt.length());
|
||||
assertEquals("international", termAtt.toString());
|
||||
filteredStream.reset();
|
||||
|
||||
// ensure there are no more tokens, such as ANTRNXNL
|
||||
|
|
|
@ -21,7 +21,7 @@ import org.apache.lucene.analysis.Token;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
@ -75,18 +75,18 @@ public class TestBufferedTokenStream extends BaseTokenTestCase {
|
|||
final String input = "How now A B brown A cow B like A B thing?";
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
|
||||
TokenStream ts = new AB_AAB_Stream(tokenizer);
|
||||
TermAttribute term = ts.addAttribute(TermAttribute.class);
|
||||
CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
|
||||
assertTrue(ts.incrementToken());
|
||||
assertEquals("How", term.term());
|
||||
assertEquals("How", term.toString());
|
||||
assertTrue(ts.incrementToken());
|
||||
assertEquals("now", term.term());
|
||||
assertEquals("now", term.toString());
|
||||
assertTrue(ts.incrementToken());
|
||||
assertEquals("A", term.term());
|
||||
assertEquals("A", term.toString());
|
||||
// reset back to input,
|
||||
// if reset() does not work correctly then previous buffered tokens will remain
|
||||
tokenizer.reset(new StringReader(input));
|
||||
ts.reset();
|
||||
assertTrue(ts.incrementToken());
|
||||
assertEquals("How", term.term());
|
||||
assertEquals("How", term.toString());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ import java.util.Map;
|
|||
|
||||
import org.apache.lucene.analysis.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.solr.common.ResourceLoader;
|
||||
|
||||
public class TestCollationKeyFilterFactory extends BaseTokenTestCase {
|
||||
|
@ -177,13 +177,13 @@ public class TestCollationKeyFilterFactory extends BaseTokenTestCase {
|
|||
|
||||
private void assertCollatesToSame(TokenStream stream1, TokenStream stream2)
|
||||
throws IOException {
|
||||
TermAttribute term1 = stream1
|
||||
.addAttribute(TermAttribute.class);
|
||||
TermAttribute term2 = stream2
|
||||
.addAttribute(TermAttribute.class);
|
||||
CharTermAttribute term1 = stream1
|
||||
.addAttribute(CharTermAttribute.class);
|
||||
CharTermAttribute term2 = stream2
|
||||
.addAttribute(CharTermAttribute.class);
|
||||
assertTrue(stream1.incrementToken());
|
||||
assertTrue(stream2.incrementToken());
|
||||
assertEquals(term1.term(), term2.term());
|
||||
assertEquals(term1.toString(), term2.toString());
|
||||
assertFalse(stream1.incrementToken());
|
||||
assertFalse(stream2.incrementToken());
|
||||
}
|
||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.lucene.analysis.CharStream;
|
|||
import org.apache.lucene.analysis.MappingCharFilter;
|
||||
import org.apache.lucene.analysis.NormalizeCharMap;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
public class TestPatternTokenizerFactory extends BaseTokenTestCase
|
||||
{
|
||||
|
@ -117,17 +117,17 @@ public class TestPatternTokenizerFactory extends BaseTokenTestCase
|
|||
*/
|
||||
private static String tsToString(TokenStream in) throws IOException {
|
||||
StringBuilder out = new StringBuilder();
|
||||
TermAttribute termAtt = in.addAttribute(TermAttribute.class);
|
||||
CharTermAttribute termAtt = in.addAttribute(CharTermAttribute.class);
|
||||
// extra safety to enforce, that the state is not preserved and also
|
||||
// assign bogus values
|
||||
in.clearAttributes();
|
||||
termAtt.setTermBuffer("bogusTerm");
|
||||
termAtt.setEmpty().append("bogusTerm");
|
||||
while (in.incrementToken()) {
|
||||
if (out.length() > 0)
|
||||
out.append(' ');
|
||||
out.append(termAtt.term());
|
||||
out.append(termAtt.toString());
|
||||
in.clearAttributes();
|
||||
termAtt.setTermBuffer("bogusTerm");
|
||||
termAtt.setEmpty().append("bogusTerm");
|
||||
}
|
||||
|
||||
in.close();
|
||||
|
|
|
@ -21,7 +21,7 @@ import org.apache.lucene.analysis.Token;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.Arrays;
|
||||
|
@ -44,14 +44,14 @@ public class TestRemoveDuplicatesTokenFilter extends BaseTokenTestCase {
|
|||
RemoveDuplicatesTokenFilterFactory factory = new RemoveDuplicatesTokenFilterFactory();
|
||||
final TokenStream ts = factory.create
|
||||
(new TokenStream() {
|
||||
TermAttribute termAtt = addAttribute(TermAttribute.class);
|
||||
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
public boolean incrementToken() {
|
||||
if (toks.hasNext()) {
|
||||
clearAttributes();
|
||||
Token tok = toks.next();
|
||||
termAtt.setTermBuffer(tok.term());
|
||||
termAtt.setEmpty().append(tok.term());
|
||||
offsetAtt.setOffset(tok.startOffset(), tok.endOffset());
|
||||
posIncAtt.setPositionIncrement(tok.getPositionIncrement());
|
||||
return true;
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -381,7 +381,7 @@ public class TestSynonymFilter extends BaseTokenTestCase {
|
|||
private static class IterTokenStream extends TokenStream {
|
||||
final Token tokens[];
|
||||
int index = 0;
|
||||
TermAttribute termAtt = addAttribute(TermAttribute.class);
|
||||
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
|
||||
|
@ -403,7 +403,7 @@ public class TestSynonymFilter extends BaseTokenTestCase {
|
|||
else {
|
||||
clearAttributes();
|
||||
Token token = tokens[index++];
|
||||
termAtt.setTermBuffer(token.term());
|
||||
termAtt.setEmpty().append(token.term());
|
||||
offsetAtt.setOffset(token.startOffset(), token.endOffset());
|
||||
posIncAtt.setPositionIncrement(token.getPositionIncrement());
|
||||
flagsAtt.setFlags(token.getFlags());
|
||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
|
||||
/**
|
||||
|
@ -81,7 +81,7 @@ public class TestTrimFilter extends BaseTokenTestCase {
|
|||
private static class IterTokenStream extends TokenStream {
|
||||
final Token tokens[];
|
||||
int index = 0;
|
||||
TermAttribute termAtt = addAttribute(TermAttribute.class);
|
||||
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
|
||||
|
@ -103,7 +103,7 @@ public class TestTrimFilter extends BaseTokenTestCase {
|
|||
else {
|
||||
clearAttributes();
|
||||
Token token = tokens[index++];
|
||||
termAtt.setTermBuffer(token.term());
|
||||
termAtt.setEmpty().append(token.term());
|
||||
offsetAtt.setOffset(token.startOffset(), token.endOffset());
|
||||
posIncAtt.setPositionIncrement(token.getPositionIncrement());
|
||||
flagsAtt.setFlags(token.getFlags());
|
||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.analysis.WhitespaceTokenizer;
|
|||
import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
@ -347,19 +347,17 @@ public class TestWordDelimiterFilter extends SolrTestCaseJ4 {
|
|||
* Set a large position increment gap of 10 if the token is "largegap" or "/"
|
||||
*/
|
||||
private final class LargePosIncTokenFilter extends TokenFilter {
|
||||
private TermAttribute termAtt;
|
||||
private PositionIncrementAttribute posIncAtt;
|
||||
private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
protected LargePosIncTokenFilter(TokenStream input) {
|
||||
super(input);
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
if (termAtt.term().equals("largegap") || termAtt.term().equals("/"))
|
||||
if (termAtt.toString().equals("largegap") || termAtt.toString().equals("/"))
|
||||
posIncAtt.setPositionIncrement(10);
|
||||
return true;
|
||||
} else {
|
||||
|
|
|
@ -19,11 +19,11 @@ package org.apache.solr.spelling;
|
|||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
|
||||
import java.util.Collection;
|
||||
|
@ -43,18 +43,18 @@ class SimpleQueryConverter extends SpellingQueryConverter{
|
|||
WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
|
||||
TokenStream ts = analyzer.tokenStream("", new StringReader(origQuery));
|
||||
// TODO: support custom attributes
|
||||
TermAttribute termAtt = (TermAttribute) ts.addAttribute(TermAttribute.class);
|
||||
OffsetAttribute offsetAtt = (OffsetAttribute) ts.addAttribute(OffsetAttribute.class);
|
||||
TypeAttribute typeAtt = (TypeAttribute) ts.addAttribute(TypeAttribute.class);
|
||||
FlagsAttribute flagsAtt = (FlagsAttribute) ts.addAttribute(FlagsAttribute.class);
|
||||
PayloadAttribute payloadAtt = (PayloadAttribute) ts.addAttribute(PayloadAttribute.class);
|
||||
PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) ts.addAttribute(PositionIncrementAttribute.class);
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
||||
TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
|
||||
FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
|
||||
PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
|
||||
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
try {
|
||||
ts.reset();
|
||||
while (ts.incrementToken()){
|
||||
Token tok = new Token();
|
||||
tok.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
|
||||
tok.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
|
||||
tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
|
||||
tok.setFlags(flagsAtt.getFlags());
|
||||
tok.setPayload(payloadAtt.getPayload());
|
||||
|
|
|
@ -213,17 +213,17 @@
|
|||
|
||||
final Iterator<Token> iter = tokens.iterator();
|
||||
tstream = filtfac.create( new TokenStream() {
|
||||
TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
|
||||
OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute (OffsetAttribute.class);
|
||||
TypeAttribute typeAtt = (TypeAttribute) addAttribute (TypeAttribute.class);
|
||||
FlagsAttribute flagsAtt = (FlagsAttribute) addAttribute (FlagsAttribute.class);
|
||||
PayloadAttribute payloadAtt = (PayloadAttribute) addAttribute (PayloadAttribute.class);
|
||||
PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) addAttribute (PositionIncrementAttribute.class);
|
||||
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
OffsetAttribute offsetAtt = addAttribute (OffsetAttribute.class);
|
||||
TypeAttribute typeAtt = addAttribute (TypeAttribute.class);
|
||||
FlagsAttribute flagsAtt = addAttribute (FlagsAttribute.class);
|
||||
PayloadAttribute payloadAtt = addAttribute (PayloadAttribute.class);
|
||||
PositionIncrementAttribute posIncAtt = addAttribute (PositionIncrementAttribute.class);
|
||||
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (iter.hasNext()) {
|
||||
Token token = iter.next();
|
||||
termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
|
||||
termAtt.copyBuffer(token.termBuffer(), 0, token.termLength());
|
||||
offsetAtt.setOffset(token.startOffset(), token.endOffset());
|
||||
typeAtt.setType(token.type());
|
||||
flagsAtt.setFlags(token.getFlags());
|
||||
|
@ -255,19 +255,19 @@
|
|||
|
||||
static List<Token> getTokens(TokenStream tstream) throws IOException {
|
||||
List<Token> tokens = new ArrayList<Token>();
|
||||
TermAttribute termAtt = (TermAttribute) tstream.addAttribute(TermAttribute.class);
|
||||
OffsetAttribute offsetAtt = (OffsetAttribute) tstream.addAttribute (OffsetAttribute.class);
|
||||
TypeAttribute typeAtt = (TypeAttribute) tstream.addAttribute (TypeAttribute.class);
|
||||
FlagsAttribute flagsAtt = (FlagsAttribute) tstream.addAttribute (FlagsAttribute.class);
|
||||
PayloadAttribute payloadAtt = (PayloadAttribute) tstream.addAttribute (PayloadAttribute.class);
|
||||
PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) tstream.addAttribute (PositionIncrementAttribute.class);
|
||||
CharTermAttribute termAtt = tstream.addAttribute(CharTermAttribute.class);
|
||||
OffsetAttribute offsetAtt = tstream.addAttribute (OffsetAttribute.class);
|
||||
TypeAttribute typeAtt = tstream.addAttribute (TypeAttribute.class);
|
||||
FlagsAttribute flagsAtt = tstream.addAttribute (FlagsAttribute.class);
|
||||
PayloadAttribute payloadAtt = tstream.addAttribute (PayloadAttribute.class);
|
||||
PositionIncrementAttribute posIncAtt = tstream.addAttribute (PositionIncrementAttribute.class);
|
||||
|
||||
while (true) {
|
||||
if (!tstream.incrementToken())
|
||||
break;
|
||||
else {
|
||||
Token token = new Token();
|
||||
token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
|
||||
token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
|
||||
token.setType(typeAtt.type());
|
||||
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
|
||||
token.setPayload(payloadAtt.getPayload());
|
||||
|
|
Loading…
Reference in New Issue