mirror of https://github.com/apache/lucene.git
SOLR-1876: convert all Solr tokenstreams to CharTermAttribute, make all non-final TokenStreams/Analyzers final
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@932862 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3ae65e769c
commit
9a43d0ee41
|
@ -59,7 +59,8 @@ Upgrading from Solr 1.4
|
||||||
"terms" container is a map instead of a named list. This affects
|
"terms" container is a map instead of a named list. This affects
|
||||||
response formats like JSON, but not XML. (yonik)
|
response formats like JSON, but not XML. (yonik)
|
||||||
|
|
||||||
|
* SOLR-1876: All Analyzers and TokenStreams are now final to enforce
|
||||||
|
the decorator pattern. (rmuir, uschindler)
|
||||||
|
|
||||||
Detailed Change List
|
Detailed Change List
|
||||||
----------------------
|
----------------------
|
||||||
|
|
|
@ -455,6 +455,10 @@
|
||||||
|
|
||||||
<formatter classname="${junit.details.formatter}" usefile="false" if="junit.details"/>
|
<formatter classname="${junit.details.formatter}" usefile="false" if="junit.details"/>
|
||||||
<classpath refid="test.run.classpath"/>
|
<classpath refid="test.run.classpath"/>
|
||||||
|
<assertions>
|
||||||
|
<enable package="org.apache.lucene"/>
|
||||||
|
<enable package="org.apache.solr"/>
|
||||||
|
</assertions>
|
||||||
<formatter type="${junit.formatter}"/>
|
<formatter type="${junit.formatter}"/>
|
||||||
<batchtest fork="yes" todir="${junit.output.dir}" if="runall">
|
<batchtest fork="yes" todir="${junit.output.dir}" if="runall">
|
||||||
<fileset dir="src/test" includes="**/Test@{pattern}*.java,**/@{pattern}*Test.java"/>
|
<fileset dir="src/test" includes="**/Test@{pattern}*.java,**/@{pattern}*Test.java"/>
|
||||||
|
|
|
@ -139,6 +139,10 @@
|
||||||
>
|
>
|
||||||
<formatter type="brief" usefile="false" if="junit.details"/>
|
<formatter type="brief" usefile="false" if="junit.details"/>
|
||||||
<classpath refid="test.classpath"/>
|
<classpath refid="test.classpath"/>
|
||||||
|
<assertions>
|
||||||
|
<enable package="org.apache.lucene"/>
|
||||||
|
<enable package="org.apache.solr"/>
|
||||||
|
</assertions>
|
||||||
<formatter type="xml"/>
|
<formatter type="xml"/>
|
||||||
<batchtest fork="yes" todir="${junit.output.dir}" unless="testcase">
|
<batchtest fork="yes" todir="${junit.output.dir}" unless="testcase">
|
||||||
<fileset dir="src/test/java" includes="${junit.includes}">
|
<fileset dir="src/test/java" includes="${junit.includes}">
|
||||||
|
|
|
@ -162,6 +162,10 @@
|
||||||
>
|
>
|
||||||
<formatter type="brief" usefile="false" if="junit.details"/>
|
<formatter type="brief" usefile="false" if="junit.details"/>
|
||||||
<classpath refid="test.extras.classpath"/>
|
<classpath refid="test.extras.classpath"/>
|
||||||
|
<assertions>
|
||||||
|
<enable package="org.apache.lucene"/>
|
||||||
|
<enable package="org.apache.solr"/>
|
||||||
|
</assertions>
|
||||||
<formatter type="xml"/>
|
<formatter type="xml"/>
|
||||||
<batchtest fork="yes" todir="${junit.output.dir}" unless="testcase">
|
<batchtest fork="yes" todir="${junit.output.dir}" unless="testcase">
|
||||||
<fileset dir="src/extras/test/java" includes="${junit.includes}"/>
|
<fileset dir="src/extras/test/java" includes="${junit.includes}"/>
|
||||||
|
|
|
@ -88,6 +88,10 @@
|
||||||
>
|
>
|
||||||
<formatter type="brief" usefile="false" if="junit.details"/>
|
<formatter type="brief" usefile="false" if="junit.details"/>
|
||||||
<classpath refid="test.classpath"/>
|
<classpath refid="test.classpath"/>
|
||||||
|
<assertions>
|
||||||
|
<enable package="org.apache.lucene"/>
|
||||||
|
<enable package="org.apache.solr"/>
|
||||||
|
</assertions>
|
||||||
<formatter type="xml"/>
|
<formatter type="xml"/>
|
||||||
<batchtest fork="yes" todir="${junit.output.dir}" unless="testcase">
|
<batchtest fork="yes" todir="${junit.output.dir}" unless="testcase">
|
||||||
<fileset dir="src/test/java" includes="${junit.includes}"/>
|
<fileset dir="src/test/java" includes="${junit.includes}"/>
|
||||||
|
|
|
@ -87,6 +87,10 @@
|
||||||
<sysproperty key="java.util.logging.config.file" value="${common-solr.dir}/testlogging.properties"/>
|
<sysproperty key="java.util.logging.config.file" value="${common-solr.dir}/testlogging.properties"/>
|
||||||
<formatter type="brief" usefile="false"/>
|
<formatter type="brief" usefile="false"/>
|
||||||
<classpath refid="test.classpath"/>
|
<classpath refid="test.classpath"/>
|
||||||
|
<assertions>
|
||||||
|
<enable package="org.apache.lucene"/>
|
||||||
|
<enable package="org.apache.solr"/>
|
||||||
|
</assertions>
|
||||||
<!--<formatter type="xml" usefile="false"/>-->
|
<!--<formatter type="xml" usefile="false"/>-->
|
||||||
<batchtest fork="yes" todir="${junit.output.dir}" unless="testcase">
|
<batchtest fork="yes" todir="${junit.output.dir}" unless="testcase">
|
||||||
<fileset dir="src/test" includes="${junit.includes}"/>
|
<fileset dir="src/test" includes="${junit.includes}"/>
|
||||||
|
|
|
@ -20,11 +20,11 @@ package org.apache.solr.analysis;
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
import org.apache.lucene.util.AttributeSource; // javadoc @link
|
import org.apache.lucene.util.AttributeSource; // javadoc @link
|
||||||
|
|
||||||
|
@ -73,7 +73,7 @@ public abstract class BufferedTokenStream extends TokenFilter {
|
||||||
private final LinkedList<Token> inQueue = new LinkedList<Token>();
|
private final LinkedList<Token> inQueue = new LinkedList<Token>();
|
||||||
private final LinkedList<Token> outQueue = new LinkedList<Token>();
|
private final LinkedList<Token> outQueue = new LinkedList<Token>();
|
||||||
|
|
||||||
private final TermAttribute termAtt = addAttribute(TermAttribute.class);
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||||
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
|
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
|
||||||
private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
|
private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
|
||||||
|
@ -150,7 +150,7 @@ public abstract class BufferedTokenStream extends TokenFilter {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
Token token = new Token();
|
Token token = new Token();
|
||||||
token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
|
token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
|
||||||
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
|
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
|
||||||
token.setType(typeAtt.type());
|
token.setType(typeAtt.type());
|
||||||
token.setFlags(flagsAtt.getFlags());
|
token.setFlags(flagsAtt.getFlags());
|
||||||
|
@ -163,7 +163,7 @@ public abstract class BufferedTokenStream extends TokenFilter {
|
||||||
/** old api emulation for back compat */
|
/** old api emulation for back compat */
|
||||||
private boolean writeToken(Token token) throws IOException {
|
private boolean writeToken(Token token) throws IOException {
|
||||||
clearAttributes();
|
clearAttributes();
|
||||||
termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
|
termAtt.copyBuffer(token.termBuffer(), 0, token.termLength());
|
||||||
offsetAtt.setOffset(token.startOffset(), token.endOffset());
|
offsetAtt.setOffset(token.startOffset(), token.endOffset());
|
||||||
typeAtt.setType(token.type());
|
typeAtt.setType(token.type());
|
||||||
flagsAtt.setFlags(token.getFlags());
|
flagsAtt.setFlags(token.getFlags());
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
package org.apache.solr.analysis;
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.*;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -188,22 +188,21 @@ public class CapitalizationFilterFactory extends BaseTokenFilterFactory {
|
||||||
* <p/>
|
* <p/>
|
||||||
* This is package protected since it is not useful without the Factory
|
* This is package protected since it is not useful without the Factory
|
||||||
*/
|
*/
|
||||||
class CapitalizationFilter extends TokenFilter {
|
final class CapitalizationFilter extends TokenFilter {
|
||||||
private final CapitalizationFilterFactory factory;
|
private final CapitalizationFilterFactory factory;
|
||||||
private final TermAttribute termAtt;
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
|
|
||||||
public CapitalizationFilter(TokenStream in, final CapitalizationFilterFactory factory) {
|
public CapitalizationFilter(TokenStream in, final CapitalizationFilterFactory factory) {
|
||||||
super(in);
|
super(in);
|
||||||
this.factory = factory;
|
this.factory = factory;
|
||||||
this.termAtt = addAttribute(TermAttribute.class);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
if (!input.incrementToken()) return false;
|
if (!input.incrementToken()) return false;
|
||||||
|
|
||||||
char[] termBuffer = termAtt.termBuffer();
|
char[] termBuffer = termAtt.buffer();
|
||||||
int termBufferLength = termAtt.termLength();
|
int termBufferLength = termAtt.length();
|
||||||
char[] backup = null;
|
char[] backup = null;
|
||||||
if (factory.maxWordCount < CapitalizationFilterFactory.DEFAULT_MAX_WORD_COUNT) {
|
if (factory.maxWordCount < CapitalizationFilterFactory.DEFAULT_MAX_WORD_COUNT) {
|
||||||
//make a backup in case we exceed the word count
|
//make a backup in case we exceed the word count
|
||||||
|
@ -232,7 +231,7 @@ class CapitalizationFilter extends TokenFilter {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (wordCount > factory.maxWordCount) {
|
if (wordCount > factory.maxWordCount) {
|
||||||
termAtt.setTermBuffer(backup, 0, termBufferLength);
|
termAtt.copyBuffer(backup, 0, termBufferLength);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,7 @@ import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
@ -52,7 +52,7 @@ public final class CommonGramsFilter extends TokenFilter {
|
||||||
|
|
||||||
private final StringBuilder buffer = new StringBuilder();
|
private final StringBuilder buffer = new StringBuilder();
|
||||||
|
|
||||||
private final TermAttribute termAttribute = addAttribute(TermAttribute.class);
|
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
||||||
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
|
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
|
||||||
private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
|
private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
|
||||||
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
|
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
|
||||||
|
@ -231,7 +231,7 @@ public final class CommonGramsFilter extends TokenFilter {
|
||||||
* @return {@code true} if the current token is a common term, {@code false} otherwise
|
* @return {@code true} if the current token is a common term, {@code false} otherwise
|
||||||
*/
|
*/
|
||||||
private boolean isCommon() {
|
private boolean isCommon() {
|
||||||
return commonWords != null && commonWords.contains(termAttribute.termBuffer(), 0, termAttribute.termLength());
|
return commonWords != null && commonWords.contains(termAttribute.buffer(), 0, termAttribute.length());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -239,7 +239,7 @@ public final class CommonGramsFilter extends TokenFilter {
|
||||||
*/
|
*/
|
||||||
private void saveTermBuffer() {
|
private void saveTermBuffer() {
|
||||||
buffer.setLength(0);
|
buffer.setLength(0);
|
||||||
buffer.append(termAttribute.termBuffer(), 0, termAttribute.termLength());
|
buffer.append(termAttribute.buffer(), 0, termAttribute.length());
|
||||||
buffer.append(SEPARATOR);
|
buffer.append(SEPARATOR);
|
||||||
lastStartOffset = offsetAttribute.startOffset();
|
lastStartOffset = offsetAttribute.startOffset();
|
||||||
lastWasCommon = isCommon();
|
lastWasCommon = isCommon();
|
||||||
|
@ -249,19 +249,19 @@ public final class CommonGramsFilter extends TokenFilter {
|
||||||
* Constructs a compound token.
|
* Constructs a compound token.
|
||||||
*/
|
*/
|
||||||
private void gramToken() {
|
private void gramToken() {
|
||||||
buffer.append(termAttribute.termBuffer(), 0, termAttribute.termLength());
|
buffer.append(termAttribute.buffer(), 0, termAttribute.length());
|
||||||
int endOffset = offsetAttribute.endOffset();
|
int endOffset = offsetAttribute.endOffset();
|
||||||
|
|
||||||
clearAttributes();
|
clearAttributes();
|
||||||
|
|
||||||
int length = buffer.length();
|
int length = buffer.length();
|
||||||
char termText[] = termAttribute.termBuffer();
|
char termText[] = termAttribute.buffer();
|
||||||
if (length > termText.length) {
|
if (length > termText.length) {
|
||||||
termText = termAttribute.resizeTermBuffer(length);
|
termText = termAttribute.resizeBuffer(length);
|
||||||
}
|
}
|
||||||
|
|
||||||
buffer.getChars(0, length, termText, 0);
|
buffer.getChars(0, length, termText, 0);
|
||||||
termAttribute.setTermLength(length);
|
termAttribute.setLength(length);
|
||||||
posIncAttribute.setPositionIncrement(0);
|
posIncAttribute.setPositionIncrement(0);
|
||||||
offsetAttribute.setOffset(lastStartOffset, endOffset);
|
offsetAttribute.setOffset(lastStartOffset, endOffset);
|
||||||
typeAttribute.setType(GRAM_TYPE);
|
typeAttribute.setType(GRAM_TYPE);
|
||||||
|
|
|
@ -22,25 +22,23 @@ import java.util.LinkedList;
|
||||||
import org.apache.commons.codec.language.DoubleMetaphone;
|
import org.apache.commons.codec.language.DoubleMetaphone;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
|
||||||
public class DoubleMetaphoneFilter extends TokenFilter {
|
public final class DoubleMetaphoneFilter extends TokenFilter {
|
||||||
|
|
||||||
private static final String TOKEN_TYPE = "DoubleMetaphone";
|
private static final String TOKEN_TYPE = "DoubleMetaphone";
|
||||||
|
|
||||||
private final LinkedList<State> remainingTokens = new LinkedList<State>();
|
private final LinkedList<State> remainingTokens = new LinkedList<State>();
|
||||||
private final DoubleMetaphone encoder = new DoubleMetaphone();
|
private final DoubleMetaphone encoder = new DoubleMetaphone();
|
||||||
private final boolean inject;
|
private final boolean inject;
|
||||||
private final TermAttribute termAtt;
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
private final PositionIncrementAttribute posAtt;
|
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
|
||||||
|
|
||||||
protected DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, boolean inject) {
|
protected DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, boolean inject) {
|
||||||
super(input);
|
super(input);
|
||||||
this.encoder.setMaxCodeLen(maxCodeLength);
|
this.encoder.setMaxCodeLen(maxCodeLength);
|
||||||
this.inject = inject;
|
this.inject = inject;
|
||||||
this.termAtt = addAttribute(TermAttribute.class);
|
|
||||||
this.posAtt = addAttribute(PositionIncrementAttribute.class);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -55,12 +53,12 @@ public class DoubleMetaphoneFilter extends TokenFilter {
|
||||||
|
|
||||||
if (!input.incrementToken()) return false;
|
if (!input.incrementToken()) return false;
|
||||||
|
|
||||||
int len = termAtt.termLength();
|
int len = termAtt.length();
|
||||||
if (len==0) return true; // pass through zero length terms
|
if (len==0) return true; // pass through zero length terms
|
||||||
|
|
||||||
int firstAlternativeIncrement = inject ? 0 : posAtt.getPositionIncrement();
|
int firstAlternativeIncrement = inject ? 0 : posAtt.getPositionIncrement();
|
||||||
|
|
||||||
String v = new String(termAtt.termBuffer(), 0, len);
|
String v = termAtt.toString();
|
||||||
String primaryPhoneticValue = encoder.doubleMetaphone(v);
|
String primaryPhoneticValue = encoder.doubleMetaphone(v);
|
||||||
String alternatePhoneticValue = encoder.doubleMetaphone(v, true);
|
String alternatePhoneticValue = encoder.doubleMetaphone(v, true);
|
||||||
|
|
||||||
|
@ -74,7 +72,7 @@ public class DoubleMetaphoneFilter extends TokenFilter {
|
||||||
}
|
}
|
||||||
posAtt.setPositionIncrement( firstAlternativeIncrement );
|
posAtt.setPositionIncrement( firstAlternativeIncrement );
|
||||||
firstAlternativeIncrement = 0;
|
firstAlternativeIncrement = 0;
|
||||||
termAtt.setTermBuffer(primaryPhoneticValue);
|
termAtt.setEmpty().append(primaryPhoneticValue);
|
||||||
saveState = true;
|
saveState = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -86,7 +84,7 @@ public class DoubleMetaphoneFilter extends TokenFilter {
|
||||||
saveState = false;
|
saveState = false;
|
||||||
}
|
}
|
||||||
posAtt.setPositionIncrement( firstAlternativeIncrement );
|
posAtt.setPositionIncrement( firstAlternativeIncrement );
|
||||||
termAtt.setTermBuffer(alternatePhoneticValue);
|
termAtt.setEmpty().append(alternatePhoneticValue);
|
||||||
saveState = true;
|
saveState = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.*;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* When the plain text is extracted from documents, we will often have many words hyphenated and broken into
|
* When the plain text is extracted from documents, we will often have many words hyphenated and broken into
|
||||||
|
@ -54,7 +54,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
*/
|
*/
|
||||||
public final class HyphenatedWordsFilter extends TokenFilter {
|
public final class HyphenatedWordsFilter extends TokenFilter {
|
||||||
|
|
||||||
private final TermAttribute termAttribute = addAttribute(TermAttribute.class);
|
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
||||||
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
|
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
|
||||||
|
|
||||||
private final StringBuilder hyphenated = new StringBuilder();
|
private final StringBuilder hyphenated = new StringBuilder();
|
||||||
|
@ -75,8 +75,8 @@ public final class HyphenatedWordsFilter extends TokenFilter {
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
while (input.incrementToken()) {
|
while (input.incrementToken()) {
|
||||||
char[] term = termAttribute.termBuffer();
|
char[] term = termAttribute.buffer();
|
||||||
int termLength = termAttribute.termLength();
|
int termLength = termAttribute.length();
|
||||||
|
|
||||||
if (termLength > 0 && term[termLength - 1] == '-') {
|
if (termLength > 0 && term[termLength - 1] == '-') {
|
||||||
// a hyphenated word
|
// a hyphenated word
|
||||||
|
@ -128,14 +128,14 @@ public final class HyphenatedWordsFilter extends TokenFilter {
|
||||||
restoreState(savedState);
|
restoreState(savedState);
|
||||||
savedState = null;
|
savedState = null;
|
||||||
|
|
||||||
char term[] = termAttribute.termBuffer();
|
char term[] = termAttribute.buffer();
|
||||||
int length = hyphenated.length();
|
int length = hyphenated.length();
|
||||||
if (length > termAttribute.termLength()) {
|
if (length > termAttribute.length()) {
|
||||||
term = termAttribute.resizeTermBuffer(length);
|
term = termAttribute.resizeBuffer(length);
|
||||||
}
|
}
|
||||||
|
|
||||||
hyphenated.getChars(0, length, term, 0);
|
hyphenated.getChars(0, length, term, 0);
|
||||||
termAttribute.setTermLength(length);
|
termAttribute.setLength(length);
|
||||||
offsetAttribute.setOffset(offsetAttribute.startOffset(), endOffset);
|
offsetAttribute.setOffset(offsetAttribute.startOffset(), endOffset);
|
||||||
hyphenated.setLength(0);
|
hyphenated.setLength(0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.solr.analysis;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
@ -34,7 +34,7 @@ import java.util.Set;
|
||||||
*/
|
*/
|
||||||
public final class KeepWordFilter extends TokenFilter {
|
public final class KeepWordFilter extends TokenFilter {
|
||||||
private final CharArraySet words;
|
private final CharArraySet words;
|
||||||
private final TermAttribute termAtt;
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
|
|
||||||
/** @deprecated Use {@link #KeepWordFilter(TokenStream, Set, boolean)} instead */
|
/** @deprecated Use {@link #KeepWordFilter(TokenStream, Set, boolean)} instead */
|
||||||
@Deprecated
|
@Deprecated
|
||||||
|
@ -47,13 +47,12 @@ public final class KeepWordFilter extends TokenFilter {
|
||||||
public KeepWordFilter(TokenStream in, CharArraySet words) {
|
public KeepWordFilter(TokenStream in, CharArraySet words) {
|
||||||
super(in);
|
super(in);
|
||||||
this.words = words;
|
this.words = words;
|
||||||
this.termAtt = addAttribute(TermAttribute.class);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
while (input.incrementToken()) {
|
while (input.incrementToken()) {
|
||||||
if (words.contains(termAtt.termBuffer(), 0, termAtt.termLength())) return true;
|
if (words.contains(termAtt.buffer(), 0, termAtt.length())) return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,7 +42,7 @@ public final class PatternReplaceFilter extends TokenFilter {
|
||||||
private final Pattern p;
|
private final Pattern p;
|
||||||
private final String replacement;
|
private final String replacement;
|
||||||
private final boolean all;
|
private final boolean all;
|
||||||
private final CharTermAttribute termAtt;
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
private final Matcher m;
|
private final Matcher m;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -64,7 +64,6 @@ public final class PatternReplaceFilter extends TokenFilter {
|
||||||
this.p=p;
|
this.p=p;
|
||||||
this.replacement = (null == replacement) ? "" : replacement;
|
this.replacement = (null == replacement) ? "" : replacement;
|
||||||
this.all=all;
|
this.all=all;
|
||||||
this.termAtt = addAttribute(CharTermAttribute.class);
|
|
||||||
this.m = p.matcher(termAtt);
|
this.m = p.matcher(termAtt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ import java.io.Reader;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
|
||||||
|
@ -56,7 +56,7 @@ import org.apache.commons.io.IOUtils;
|
||||||
*/
|
*/
|
||||||
public final class PatternTokenizer extends Tokenizer {
|
public final class PatternTokenizer extends Tokenizer {
|
||||||
|
|
||||||
private final TermAttribute termAtt = addAttribute(TermAttribute.class);
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||||
|
|
||||||
private String str;
|
private String str;
|
||||||
|
@ -86,7 +86,7 @@ public final class PatternTokenizer extends Tokenizer {
|
||||||
while (matcher.find()) {
|
while (matcher.find()) {
|
||||||
final String match = matcher.group(group);
|
final String match = matcher.group(group);
|
||||||
if (match.length() == 0) continue;
|
if (match.length() == 0) continue;
|
||||||
termAtt.setTermBuffer(match);
|
termAtt.setEmpty().append(match);
|
||||||
index = matcher.start(group);
|
index = matcher.start(group);
|
||||||
offsetAtt.setOffset(correctOffset(index), correctOffset(matcher.end(group)));
|
offsetAtt.setOffset(correctOffset(index), correctOffset(matcher.end(group)));
|
||||||
return true;
|
return true;
|
||||||
|
@ -101,7 +101,7 @@ public final class PatternTokenizer extends Tokenizer {
|
||||||
while (matcher.find()) {
|
while (matcher.find()) {
|
||||||
if (matcher.start() - index > 0) {
|
if (matcher.start() - index > 0) {
|
||||||
// found a non-zero-length token
|
// found a non-zero-length token
|
||||||
termAtt.setTermBuffer(str, index, matcher.start() - index);
|
termAtt.setEmpty().append(str, index, matcher.start());
|
||||||
offsetAtt.setOffset(correctOffset(index), correctOffset(matcher.start()));
|
offsetAtt.setOffset(correctOffset(index), correctOffset(matcher.start()));
|
||||||
index = matcher.end();
|
index = matcher.end();
|
||||||
return true;
|
return true;
|
||||||
|
@ -115,7 +115,7 @@ public final class PatternTokenizer extends Tokenizer {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
termAtt.setTermBuffer(str, index, str.length() - index);
|
termAtt.setEmpty().append(str, index, str.length());
|
||||||
offsetAtt.setOffset(correctOffset(index), correctOffset(str.length()));
|
offsetAtt.setOffset(correctOffset(index), correctOffset(str.length()));
|
||||||
index = Integer.MAX_VALUE; // mark exhausted
|
index = Integer.MAX_VALUE; // mark exhausted
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.solr.analysis;
|
||||||
import org.apache.commons.codec.Encoder;
|
import org.apache.commons.codec.Encoder;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -31,23 +31,21 @@ import java.io.IOException;
|
||||||
*
|
*
|
||||||
* @version $Id$
|
* @version $Id$
|
||||||
*/
|
*/
|
||||||
public class PhoneticFilter extends TokenFilter
|
public final class PhoneticFilter extends TokenFilter
|
||||||
{
|
{
|
||||||
protected boolean inject = true;
|
protected boolean inject = true;
|
||||||
protected Encoder encoder = null;
|
protected Encoder encoder = null;
|
||||||
protected String name = null;
|
protected String name = null;
|
||||||
|
|
||||||
protected State save = null;
|
protected State save = null;
|
||||||
private final TermAttribute termAtt;
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
private final PositionIncrementAttribute posAtt;
|
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
|
||||||
|
|
||||||
public PhoneticFilter(TokenStream in, Encoder encoder, String name, boolean inject) {
|
public PhoneticFilter(TokenStream in, Encoder encoder, String name, boolean inject) {
|
||||||
super(in);
|
super(in);
|
||||||
this.encoder = encoder;
|
this.encoder = encoder;
|
||||||
this.name = name;
|
this.name = name;
|
||||||
this.inject = inject;
|
this.inject = inject;
|
||||||
this.termAtt = addAttribute(TermAttribute.class);
|
|
||||||
this.posAtt = addAttribute(PositionIncrementAttribute.class);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -62,9 +60,9 @@ public class PhoneticFilter extends TokenFilter
|
||||||
if (!input.incrementToken()) return false;
|
if (!input.incrementToken()) return false;
|
||||||
|
|
||||||
// pass through zero-length terms
|
// pass through zero-length terms
|
||||||
if (termAtt.termLength()==0) return true;
|
if (termAtt.length() == 0) return true;
|
||||||
|
|
||||||
String value = termAtt.term();
|
String value = termAtt.toString();
|
||||||
String phonetic = null;
|
String phonetic = null;
|
||||||
try {
|
try {
|
||||||
String v = encoder.encode(value).toString();
|
String v = encoder.encode(value).toString();
|
||||||
|
@ -75,7 +73,7 @@ public class PhoneticFilter extends TokenFilter
|
||||||
|
|
||||||
if (!inject) {
|
if (!inject) {
|
||||||
// just modify this token
|
// just modify this token
|
||||||
termAtt.setTermBuffer(phonetic);
|
termAtt.setEmpty().append(phonetic);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,7 +86,7 @@ public class PhoneticFilter extends TokenFilter
|
||||||
save = captureState();
|
save = captureState();
|
||||||
|
|
||||||
posAtt.setPositionIncrement(origOffset);
|
posAtt.setPositionIncrement(origOffset);
|
||||||
termAtt.setTermBuffer(phonetic);
|
termAtt.setEmpty().append(phonetic);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.solr.analysis;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -31,7 +31,7 @@ import java.io.IOException;
|
||||||
*/
|
*/
|
||||||
public final class RemoveDuplicatesTokenFilter extends TokenFilter {
|
public final class RemoveDuplicatesTokenFilter extends TokenFilter {
|
||||||
|
|
||||||
private final TermAttribute termAttribute = addAttribute(TermAttribute.class);
|
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
||||||
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
|
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
|
||||||
|
|
||||||
// use a fixed version, as we don't care about case sensitivity.
|
// use a fixed version, as we don't care about case sensitivity.
|
||||||
|
@ -52,8 +52,8 @@ public final class RemoveDuplicatesTokenFilter extends TokenFilter {
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
while (input.incrementToken()) {
|
while (input.incrementToken()) {
|
||||||
final char term[] = termAttribute.termBuffer();
|
final char term[] = termAttribute.buffer();
|
||||||
final int length = termAttribute.termLength();
|
final int length = termAttribute.length();
|
||||||
final int posIncrement = posIncAttribute.getPositionIncrement();
|
final int posIncrement = posIncAttribute.getPositionIncrement();
|
||||||
|
|
||||||
if (posIncrement > 0) {
|
if (posIncrement > 0) {
|
||||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class produces a special form of reversed tokens, suitable for
|
* This class produces a special form of reversed tokens, suitable for
|
||||||
|
@ -35,17 +35,17 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
* <code>withOriginal == true</code>, which proportionally increases the size
|
* <code>withOriginal == true</code>, which proportionally increases the size
|
||||||
* of postings and term dictionary in the index.
|
* of postings and term dictionary in the index.
|
||||||
*/
|
*/
|
||||||
public class ReversedWildcardFilter extends TokenFilter {
|
public final class ReversedWildcardFilter extends TokenFilter {
|
||||||
|
|
||||||
private boolean withOriginal;
|
private boolean withOriginal;
|
||||||
private char markerChar;
|
private char markerChar;
|
||||||
private State save;
|
private State save;
|
||||||
private TermAttribute termAtt;
|
private CharTermAttribute termAtt;
|
||||||
private PositionIncrementAttribute posAtt;
|
private PositionIncrementAttribute posAtt;
|
||||||
|
|
||||||
protected ReversedWildcardFilter(TokenStream input, boolean withOriginal, char markerChar) {
|
protected ReversedWildcardFilter(TokenStream input, boolean withOriginal, char markerChar) {
|
||||||
super(input);
|
super(input);
|
||||||
this.termAtt = addAttribute(TermAttribute.class);
|
this.termAtt = addAttribute(CharTermAttribute.class);
|
||||||
this.posAtt = addAttribute(PositionIncrementAttribute.class);
|
this.posAtt = addAttribute(PositionIncrementAttribute.class);
|
||||||
this.withOriginal = withOriginal;
|
this.withOriginal = withOriginal;
|
||||||
this.markerChar = markerChar;
|
this.markerChar = markerChar;
|
||||||
|
@ -63,19 +63,19 @@ public class ReversedWildcardFilter extends TokenFilter {
|
||||||
if (!input.incrementToken()) return false;
|
if (!input.incrementToken()) return false;
|
||||||
|
|
||||||
// pass through zero-length terms
|
// pass through zero-length terms
|
||||||
int oldLen = termAtt.termLength();
|
int oldLen = termAtt.length();
|
||||||
if (oldLen ==0) return true;
|
if (oldLen ==0) return true;
|
||||||
int origOffset = posAtt.getPositionIncrement();
|
int origOffset = posAtt.getPositionIncrement();
|
||||||
if (withOriginal == true){
|
if (withOriginal == true){
|
||||||
posAtt.setPositionIncrement(0);
|
posAtt.setPositionIncrement(0);
|
||||||
save = captureState();
|
save = captureState();
|
||||||
}
|
}
|
||||||
char [] buffer = termAtt.resizeTermBuffer(oldLen + 1);
|
char [] buffer = termAtt.resizeBuffer(oldLen + 1);
|
||||||
buffer[oldLen] = markerChar;
|
buffer[oldLen] = markerChar;
|
||||||
reverse(buffer, 0, oldLen + 1);
|
reverse(buffer, 0, oldLen + 1);
|
||||||
|
|
||||||
posAtt.setPositionIncrement(origOffset);
|
posAtt.setPositionIncrement(origOffset);
|
||||||
termAtt.setTermBuffer(buffer, 0, oldLen +1);
|
termAtt.copyBuffer(buffer, 0, oldLen +1);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ import java.util.LinkedList;
|
||||||
*
|
*
|
||||||
* @version $Id$
|
* @version $Id$
|
||||||
*/
|
*/
|
||||||
public class SynonymFilter extends TokenFilter {
|
public final class SynonymFilter extends TokenFilter {
|
||||||
|
|
||||||
private final SynonymMap map; // Map<String, SynonymMap>
|
private final SynonymMap map; // Map<String, SynonymMap>
|
||||||
private Iterator<AttributeSource> replacement; // iterator over generated tokens
|
private Iterator<AttributeSource> replacement; // iterator over generated tokens
|
||||||
|
@ -50,7 +50,7 @@ public class SynonymFilter extends TokenFilter {
|
||||||
super(in);
|
super(in);
|
||||||
this.map = map;
|
this.map = map;
|
||||||
// just ensuring these attributes exist...
|
// just ensuring these attributes exist...
|
||||||
addAttribute(TermAttribute.class);
|
addAttribute(CharTermAttribute.class);
|
||||||
addAttribute(PositionIncrementAttribute.class);
|
addAttribute(PositionIncrementAttribute.class);
|
||||||
addAttribute(OffsetAttribute.class);
|
addAttribute(OffsetAttribute.class);
|
||||||
addAttribute(TypeAttribute.class);
|
addAttribute(TypeAttribute.class);
|
||||||
|
@ -87,8 +87,8 @@ public class SynonymFilter extends TokenFilter {
|
||||||
// common case fast-path of first token not matching anything
|
// common case fast-path of first token not matching anything
|
||||||
AttributeSource firstTok = nextTok();
|
AttributeSource firstTok = nextTok();
|
||||||
if (firstTok == null) return false;
|
if (firstTok == null) return false;
|
||||||
TermAttribute termAtt = firstTok.addAttribute(TermAttribute.class);
|
CharTermAttribute termAtt = firstTok.addAttribute(CharTermAttribute.class);
|
||||||
SynonymMap result = map.submap!=null ? map.submap.get(termAtt.termBuffer(), 0, termAtt.termLength()) : null;
|
SynonymMap result = map.submap!=null ? map.submap.get(termAtt.buffer(), 0, termAtt.length()) : null;
|
||||||
if (result == null) {
|
if (result == null) {
|
||||||
copy(this, firstTok);
|
copy(this, firstTok);
|
||||||
return true;
|
return true;
|
||||||
|
@ -128,14 +128,14 @@ public class SynonymFilter extends TokenFilter {
|
||||||
for (int i=0; i<result.synonyms.length; i++) {
|
for (int i=0; i<result.synonyms.length; i++) {
|
||||||
Token repTok = result.synonyms[i];
|
Token repTok = result.synonyms[i];
|
||||||
AttributeSource newTok = firstTok.cloneAttributes();
|
AttributeSource newTok = firstTok.cloneAttributes();
|
||||||
TermAttribute newTermAtt = newTok.addAttribute(TermAttribute.class);
|
CharTermAttribute newTermAtt = newTok.addAttribute(CharTermAttribute.class);
|
||||||
OffsetAttribute newOffsetAtt = newTok.addAttribute(OffsetAttribute.class);
|
OffsetAttribute newOffsetAtt = newTok.addAttribute(OffsetAttribute.class);
|
||||||
PositionIncrementAttribute newPosIncAtt = newTok.addAttribute(PositionIncrementAttribute.class);
|
PositionIncrementAttribute newPosIncAtt = newTok.addAttribute(PositionIncrementAttribute.class);
|
||||||
|
|
||||||
OffsetAttribute lastOffsetAtt = lastTok.addAttribute(OffsetAttribute.class);
|
OffsetAttribute lastOffsetAtt = lastTok.addAttribute(OffsetAttribute.class);
|
||||||
|
|
||||||
newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset());
|
newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset());
|
||||||
newTermAtt.setTermBuffer(repTok.termBuffer(), 0, repTok.termLength());
|
newTermAtt.copyBuffer(repTok.termBuffer(), 0, repTok.termLength());
|
||||||
repPos += repTok.getPositionIncrement();
|
repPos += repTok.getPositionIncrement();
|
||||||
if (i==0) repPos=origPos; // make position of first token equal to original
|
if (i==0) repPos=origPos; // make position of first token equal to original
|
||||||
|
|
||||||
|
@ -215,8 +215,8 @@ public class SynonymFilter extends TokenFilter {
|
||||||
if (tok == this)
|
if (tok == this)
|
||||||
tok = cloneAttributes();
|
tok = cloneAttributes();
|
||||||
// check for positionIncrement!=1? if>1, should not match, if==0, check multiple at this level?
|
// check for positionIncrement!=1? if>1, should not match, if==0, check multiple at this level?
|
||||||
TermAttribute termAtt = tok.getAttribute(TermAttribute.class);
|
CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
|
||||||
SynonymMap subMap = map.submap.get(termAtt.termBuffer(), 0, termAtt.termLength());
|
SynonymMap subMap = map.submap.get(termAtt.buffer(), 0, termAtt.length());
|
||||||
|
|
||||||
if (subMap != null) {
|
if (subMap != null) {
|
||||||
// recurse
|
// recurse
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
package org.apache.solr.analysis;
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.solr.common.ResourceLoader;
|
import org.apache.solr.common.ResourceLoader;
|
||||||
import org.apache.solr.common.util.StrUtils;
|
import org.apache.solr.common.util.StrUtils;
|
||||||
import org.apache.solr.util.plugin.ResourceLoaderAware;
|
import org.apache.solr.util.plugin.ResourceLoaderAware;
|
||||||
|
@ -135,11 +135,10 @@ public class SynonymFilterFactory extends BaseTokenFilterFactory implements Reso
|
||||||
TokenStream ts = loadTokenizer(tokFactory, reader);
|
TokenStream ts = loadTokenizer(tokFactory, reader);
|
||||||
List<String> tokList = new ArrayList<String>();
|
List<String> tokList = new ArrayList<String>();
|
||||||
try {
|
try {
|
||||||
TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
while (ts.incrementToken()){
|
while (ts.incrementToken()){
|
||||||
String text = new String(termAtt.termBuffer(), 0, termAtt.termLength());
|
if( termAtt.length() > 0 )
|
||||||
if( text.length() > 0 )
|
tokList.add( termAtt.toString() );
|
||||||
tokList.add( text );
|
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
|
|
|
@ -32,7 +32,7 @@ import java.io.Reader;
|
||||||
// An analyzer that uses a tokenizer and a list of token filters to
|
// An analyzer that uses a tokenizer and a list of token filters to
|
||||||
// create a TokenStream.
|
// create a TokenStream.
|
||||||
//
|
//
|
||||||
public class TokenizerChain extends SolrAnalyzer {
|
public final class TokenizerChain extends SolrAnalyzer {
|
||||||
final private CharFilterFactory[] charFilters;
|
final private CharFilterFactory[] charFilters;
|
||||||
final private TokenizerFactory tokenizer;
|
final private TokenizerFactory tokenizer;
|
||||||
final private TokenFilterFactory[] filters;
|
final private TokenFilterFactory[] filters;
|
||||||
|
|
|
@ -51,7 +51,7 @@ public class TrieTokenizerFactory extends BaseTokenizerFactory {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class TrieTokenizer extends Tokenizer {
|
final class TrieTokenizer extends Tokenizer {
|
||||||
protected static final DateField dateField = new DateField();
|
protected static final DateField dateField = new DateField();
|
||||||
protected final int precisionStep;
|
protected final int precisionStep;
|
||||||
protected final TrieTypes type;
|
protected final TrieTypes type;
|
||||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.solr.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -32,24 +32,21 @@ import java.io.IOException;
|
||||||
public final class TrimFilter extends TokenFilter {
|
public final class TrimFilter extends TokenFilter {
|
||||||
|
|
||||||
final boolean updateOffsets;
|
final boolean updateOffsets;
|
||||||
private final TermAttribute termAtt;
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
private final OffsetAttribute offsetAtt;
|
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||||
|
|
||||||
|
|
||||||
public TrimFilter(TokenStream in, boolean updateOffsets) {
|
public TrimFilter(TokenStream in, boolean updateOffsets) {
|
||||||
super(in);
|
super(in);
|
||||||
this.updateOffsets = updateOffsets;
|
this.updateOffsets = updateOffsets;
|
||||||
|
|
||||||
this.termAtt = addAttribute(TermAttribute.class);
|
|
||||||
this.offsetAtt = addAttribute(OffsetAttribute.class);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
if (!input.incrementToken()) return false;
|
if (!input.incrementToken()) return false;
|
||||||
|
|
||||||
char[] termBuffer = termAtt.termBuffer();
|
char[] termBuffer = termAtt.buffer();
|
||||||
int len = termAtt.termLength();
|
int len = termAtt.length();
|
||||||
//TODO: Is this the right behavior or should we return false? Currently, " ", returns true, so I think this should
|
//TODO: Is this the right behavior or should we return false? Currently, " ", returns true, so I think this should
|
||||||
//also return true
|
//also return true
|
||||||
if (len == 0){
|
if (len == 0){
|
||||||
|
@ -69,9 +66,9 @@ public final class TrimFilter extends TokenFilter {
|
||||||
}
|
}
|
||||||
if (start > 0 || end < len) {
|
if (start > 0 || end < len) {
|
||||||
if (start < end) {
|
if (start < end) {
|
||||||
termAtt.setTermBuffer(termBuffer, start, (end - start));
|
termAtt.copyBuffer(termBuffer, start, (end - start));
|
||||||
} else {
|
} else {
|
||||||
termAtt.setTermLength(0);
|
termAtt.setEmpty();
|
||||||
}
|
}
|
||||||
if (updateOffsets) {
|
if (updateOffsets) {
|
||||||
int newStart = offsetAtt.startOffset()+start;
|
int newStart = offsetAtt.startOffset()+start;
|
||||||
|
|
|
@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
@ -120,7 +120,7 @@ final class WordDelimiterFilter extends TokenFilter {
|
||||||
*/
|
*/
|
||||||
final CharArraySet protWords;
|
final CharArraySet protWords;
|
||||||
|
|
||||||
private final TermAttribute termAttribute = addAttribute(TermAttribute.class);
|
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
||||||
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
|
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
|
||||||
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
|
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
|
||||||
private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
|
private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
|
||||||
|
@ -338,8 +338,8 @@ final class WordDelimiterFilter extends TokenFilter {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int termLength = termAttribute.termLength();
|
int termLength = termAttribute.length();
|
||||||
char[] termBuffer = termAttribute.termBuffer();
|
char[] termBuffer = termAttribute.buffer();
|
||||||
|
|
||||||
accumPosInc += posIncAttribute.getPositionIncrement();
|
accumPosInc += posIncAttribute.getPositionIncrement();
|
||||||
|
|
||||||
|
@ -462,14 +462,14 @@ final class WordDelimiterFilter extends TokenFilter {
|
||||||
savedStartOffset = offsetAttribute.startOffset();
|
savedStartOffset = offsetAttribute.startOffset();
|
||||||
savedEndOffset = offsetAttribute.endOffset();
|
savedEndOffset = offsetAttribute.endOffset();
|
||||||
// if length by start + end offsets doesn't match the term text then assume this is a synonym and don't adjust the offsets.
|
// if length by start + end offsets doesn't match the term text then assume this is a synonym and don't adjust the offsets.
|
||||||
hasIllegalOffsets = (savedEndOffset - savedStartOffset != termAttribute.termLength());
|
hasIllegalOffsets = (savedEndOffset - savedStartOffset != termAttribute.length());
|
||||||
savedType = typeAttribute.type();
|
savedType = typeAttribute.type();
|
||||||
|
|
||||||
if (savedBuffer.length < termAttribute.termLength()) {
|
if (savedBuffer.length < termAttribute.length()) {
|
||||||
savedBuffer = new char[ArrayUtil.oversize(termAttribute.termLength(), RamUsageEstimator.NUM_BYTES_CHAR)];
|
savedBuffer = new char[ArrayUtil.oversize(termAttribute.length(), RamUsageEstimator.NUM_BYTES_CHAR)];
|
||||||
}
|
}
|
||||||
|
|
||||||
System.arraycopy(termAttribute.termBuffer(), 0, savedBuffer, 0, termAttribute.termLength());
|
System.arraycopy(termAttribute.buffer(), 0, savedBuffer, 0, termAttribute.length());
|
||||||
iterator.text = savedBuffer;
|
iterator.text = savedBuffer;
|
||||||
|
|
||||||
hasSavedState = true;
|
hasSavedState = true;
|
||||||
|
@ -531,7 +531,7 @@ final class WordDelimiterFilter extends TokenFilter {
|
||||||
*/
|
*/
|
||||||
private void generatePart(boolean isSingleWord) {
|
private void generatePart(boolean isSingleWord) {
|
||||||
clearAttributes();
|
clearAttributes();
|
||||||
termAttribute.setTermBuffer(savedBuffer, iterator.current, iterator.end - iterator.current);
|
termAttribute.copyBuffer(savedBuffer, iterator.current, iterator.end - iterator.current);
|
||||||
|
|
||||||
int startOffSet = (isSingleWord || !hasIllegalOffsets) ? savedStartOffset + iterator.current : savedStartOffset;
|
int startOffSet = (isSingleWord || !hasIllegalOffsets) ? savedStartOffset + iterator.current : savedStartOffset;
|
||||||
int endOffSet = (hasIllegalOffsets) ? savedEndOffset : savedStartOffset + iterator.end;
|
int endOffSet = (hasIllegalOffsets) ? savedEndOffset : savedStartOffset + iterator.end;
|
||||||
|
@ -636,13 +636,13 @@ final class WordDelimiterFilter extends TokenFilter {
|
||||||
*/
|
*/
|
||||||
void write() {
|
void write() {
|
||||||
clearAttributes();
|
clearAttributes();
|
||||||
if (termAttribute.termLength() < buffer.length()) {
|
if (termAttribute.length() < buffer.length()) {
|
||||||
termAttribute.resizeTermBuffer(buffer.length());
|
termAttribute.resizeBuffer(buffer.length());
|
||||||
}
|
}
|
||||||
char termbuffer[] = termAttribute.termBuffer();
|
char termbuffer[] = termAttribute.buffer();
|
||||||
|
|
||||||
buffer.getChars(0, buffer.length(), termbuffer, 0);
|
buffer.getChars(0, buffer.length(), termbuffer, 0);
|
||||||
termAttribute.setTermLength(buffer.length());
|
termAttribute.setLength(buffer.length());
|
||||||
|
|
||||||
if (hasIllegalOffsets) {
|
if (hasIllegalOffsets) {
|
||||||
offsetAttribute.setOffset(savedStartOffset, savedEndOffset);
|
offsetAttribute.setOffset(savedStartOffset, savedEndOffset);
|
||||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.solr.handler;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.*;
|
import org.apache.lucene.analysis.tokenattributes.*;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -135,10 +134,10 @@ public class AnalysisRequestHandler extends RequestHandlerBase {
|
||||||
// outer is namedList since order of tokens is important
|
// outer is namedList since order of tokens is important
|
||||||
NamedList<NamedList<Object>> tokens = new NamedList<NamedList<Object>>();
|
NamedList<NamedList<Object>> tokens = new NamedList<NamedList<Object>>();
|
||||||
// TODO: support custom attributes
|
// TODO: support custom attributes
|
||||||
TermAttribute termAtt = null;
|
CharTermAttribute termAtt = null;
|
||||||
TermToBytesRefAttribute bytesAtt = null;
|
TermToBytesRefAttribute bytesAtt = null;
|
||||||
if (tstream.hasAttribute(TermAttribute.class)) {
|
if (tstream.hasAttribute(CharTermAttribute.class)) {
|
||||||
termAtt = tstream.getAttribute(TermAttribute.class);
|
termAtt = tstream.getAttribute(CharTermAttribute.class);
|
||||||
} else if (tstream.hasAttribute(TermToBytesRefAttribute.class)) {
|
} else if (tstream.hasAttribute(TermToBytesRefAttribute.class)) {
|
||||||
bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class);
|
bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class);
|
||||||
}
|
}
|
||||||
|
@ -151,7 +150,7 @@ public class AnalysisRequestHandler extends RequestHandlerBase {
|
||||||
NamedList<Object> token = new SimpleOrderedMap<Object>();
|
NamedList<Object> token = new SimpleOrderedMap<Object>();
|
||||||
tokens.add("token", token);
|
tokens.add("token", token);
|
||||||
if (termAtt != null) {
|
if (termAtt != null) {
|
||||||
token.add("value", termAtt.term());
|
token.add("value", termAtt.toString());
|
||||||
}
|
}
|
||||||
if (bytesAtt != null) {
|
if (bytesAtt != null) {
|
||||||
bytesAtt.toBytesRef(bytes);
|
bytesAtt.toBytesRef(bytes);
|
||||||
|
|
|
@ -145,10 +145,10 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
||||||
List<Token> tokens = new ArrayList<Token>();
|
List<Token> tokens = new ArrayList<Token>();
|
||||||
|
|
||||||
// TODO change this API to support custom attributes
|
// TODO change this API to support custom attributes
|
||||||
TermAttribute termAtt = null;
|
CharTermAttribute termAtt = null;
|
||||||
TermToBytesRefAttribute bytesAtt = null;
|
TermToBytesRefAttribute bytesAtt = null;
|
||||||
if (tokenStream.hasAttribute(TermAttribute.class)) {
|
if (tokenStream.hasAttribute(CharTermAttribute.class)) {
|
||||||
termAtt = tokenStream.getAttribute(TermAttribute.class);
|
termAtt = tokenStream.getAttribute(CharTermAttribute.class);
|
||||||
} else if (tokenStream.hasAttribute(TermToBytesRefAttribute.class)) {
|
} else if (tokenStream.hasAttribute(TermToBytesRefAttribute.class)) {
|
||||||
bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
|
bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
|
||||||
}
|
}
|
||||||
|
@ -163,7 +163,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
||||||
while (tokenStream.incrementToken()) {
|
while (tokenStream.incrementToken()) {
|
||||||
Token token = new Token();
|
Token token = new Token();
|
||||||
if (termAtt != null) {
|
if (termAtt != null) {
|
||||||
token.setTermBuffer(termAtt.term());
|
token.setTermBuffer(termAtt.toString());
|
||||||
}
|
}
|
||||||
if (bytesAtt != null) {
|
if (bytesAtt != null) {
|
||||||
bytesAtt.toBytesRef(bytes);
|
bytesAtt.toBytesRef(bytes);
|
||||||
|
@ -259,12 +259,12 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
||||||
* TokenStream that iterates over a list of pre-existing Tokens
|
* TokenStream that iterates over a list of pre-existing Tokens
|
||||||
*/
|
*/
|
||||||
// TODO refactor to support custom attributes
|
// TODO refactor to support custom attributes
|
||||||
protected static class ListBasedTokenStream extends TokenStream {
|
protected final static class ListBasedTokenStream extends TokenStream {
|
||||||
private final List<Token> tokens;
|
private final List<Token> tokens;
|
||||||
private Iterator<Token> tokenIterator;
|
private Iterator<Token> tokenIterator;
|
||||||
|
|
||||||
private final TermAttribute termAtt = (TermAttribute)
|
private final CharTermAttribute termAtt = (CharTermAttribute)
|
||||||
addAttribute(TermAttribute.class);
|
addAttribute(CharTermAttribute.class);
|
||||||
private final OffsetAttribute offsetAtt = (OffsetAttribute)
|
private final OffsetAttribute offsetAtt = (OffsetAttribute)
|
||||||
addAttribute(OffsetAttribute.class);
|
addAttribute(OffsetAttribute.class);
|
||||||
private final TypeAttribute typeAtt = (TypeAttribute)
|
private final TypeAttribute typeAtt = (TypeAttribute)
|
||||||
|
@ -292,7 +292,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
if (tokenIterator.hasNext()) {
|
if (tokenIterator.hasNext()) {
|
||||||
Token next = tokenIterator.next();
|
Token next = tokenIterator.next();
|
||||||
termAtt.setTermBuffer(next.termBuffer(), 0, next.termLength());
|
termAtt.copyBuffer(next.termBuffer(), 0, next.termLength());
|
||||||
typeAtt.setType(next.type());
|
typeAtt.setType(next.type());
|
||||||
offsetAtt.setOffset(next.startOffset(), next.endOffset());
|
offsetAtt.setOffset(next.startOffset(), next.endOffset());
|
||||||
flagsAtt.setFlags(next.getFlags());
|
flagsAtt.setFlags(next.getFlags());
|
||||||
|
|
|
@ -39,7 +39,7 @@ import javax.xml.xpath.XPathFactory;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.*;
|
||||||
|
@ -298,9 +298,9 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
|
||||||
TokenStream tokens = analyzer.reusableTokenStream( "", new StringReader( query ) );
|
TokenStream tokens = analyzer.reusableTokenStream( "", new StringReader( query ) );
|
||||||
tokens.reset();
|
tokens.reset();
|
||||||
|
|
||||||
TermAttribute termAtt = (TermAttribute) tokens.addAttribute(TermAttribute.class);
|
CharTermAttribute termAtt = tokens.addAttribute(CharTermAttribute.class);
|
||||||
while( tokens.incrementToken() ) {
|
while( tokens.incrementToken() ) {
|
||||||
norm.append( termAtt.termBuffer(), 0, termAtt.termLength() );
|
norm.append( termAtt.buffer(), 0, termAtt.length() );
|
||||||
}
|
}
|
||||||
return norm.toString();
|
return norm.toString();
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,7 +37,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
|
@ -373,16 +373,16 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
||||||
TokenStream ts = analyzer.reusableTokenStream("", new StringReader(q));
|
TokenStream ts = analyzer.reusableTokenStream("", new StringReader(q));
|
||||||
ts.reset();
|
ts.reset();
|
||||||
// TODO: support custom attributes
|
// TODO: support custom attributes
|
||||||
TermAttribute termAtt = (TermAttribute) ts.addAttribute(TermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
OffsetAttribute offsetAtt = (OffsetAttribute) ts.addAttribute(OffsetAttribute.class);
|
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
||||||
TypeAttribute typeAtt = (TypeAttribute) ts.addAttribute(TypeAttribute.class);
|
TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
|
||||||
FlagsAttribute flagsAtt = (FlagsAttribute) ts.addAttribute(FlagsAttribute.class);
|
FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
|
||||||
PayloadAttribute payloadAtt = (PayloadAttribute) ts.addAttribute(PayloadAttribute.class);
|
PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
|
||||||
PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) ts.addAttribute(PositionIncrementAttribute.class);
|
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
|
||||||
|
|
||||||
while (ts.incrementToken()){
|
while (ts.incrementToken()){
|
||||||
Token token = new Token();
|
Token token = new Token();
|
||||||
token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
|
token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
|
||||||
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
|
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
|
||||||
token.setType(typeAtt.type());
|
token.setType(typeAtt.type());
|
||||||
token.setFlags(flagsAtt.getFlags());
|
token.setFlags(flagsAtt.getFlags());
|
||||||
|
|
|
@ -512,7 +512,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
||||||
* This is meant to work around fickleness in the highlighter only. It
|
* This is meant to work around fickleness in the highlighter only. It
|
||||||
* can mess up token positions and should not be used for indexing or querying.
|
* can mess up token positions and should not be used for indexing or querying.
|
||||||
*/
|
*/
|
||||||
class TokenOrderingFilter extends TokenFilter {
|
final class TokenOrderingFilter extends TokenFilter {
|
||||||
private final int windowSize;
|
private final int windowSize;
|
||||||
private final LinkedList<OrderedToken> queue = new LinkedList<OrderedToken>();
|
private final LinkedList<OrderedToken> queue = new LinkedList<OrderedToken>();
|
||||||
private boolean done=false;
|
private boolean done=false;
|
||||||
|
@ -586,7 +586,7 @@ class TermOffsetsTokenStream {
|
||||||
return new MultiValuedStream(length);
|
return new MultiValuedStream(length);
|
||||||
}
|
}
|
||||||
|
|
||||||
class MultiValuedStream extends TokenStream {
|
final class MultiValuedStream extends TokenStream {
|
||||||
private final int length;
|
private final int length;
|
||||||
OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
|
OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
|
||||||
|
|
||||||
|
|
|
@ -20,11 +20,9 @@ package org.apache.solr.schema;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.solr.search.function.ValueSource;
|
import org.apache.solr.search.function.ValueSource;
|
||||||
import org.apache.solr.search.function.OrdFieldSource;
|
import org.apache.solr.search.function.OrdFieldSource;
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
import org.apache.solr.response.TextResponseWriter;
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
import org.apache.solr.response.XMLWriter;
|
import org.apache.solr.response.XMLWriter;
|
||||||
|
@ -59,7 +57,7 @@ public class BoolField extends FieldType {
|
||||||
protected final static Analyzer boolAnalyzer = new SolrAnalyzer() {
|
protected final static Analyzer boolAnalyzer = new SolrAnalyzer() {
|
||||||
public TokenStreamInfo getStream(String fieldName, Reader reader) {
|
public TokenStreamInfo getStream(String fieldName, Reader reader) {
|
||||||
Tokenizer tokenizer = new Tokenizer(reader) {
|
Tokenizer tokenizer = new Tokenizer(reader) {
|
||||||
final TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
|
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
boolean done = false;
|
boolean done = false;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -75,7 +73,7 @@ public class BoolField extends FieldType {
|
||||||
done = true;
|
done = true;
|
||||||
int ch = input.read();
|
int ch = input.read();
|
||||||
if (ch==-1) return false;
|
if (ch==-1) return false;
|
||||||
termAtt.setTermBuffer(
|
termAtt.copyBuffer(
|
||||||
((ch=='t' || ch=='T' || ch=='1') ? TRUE_TOKEN : FALSE_TOKEN)
|
((ch=='t' || ch=='T' || ch=='1') ? TRUE_TOKEN : FALSE_TOKEN)
|
||||||
,0,1);
|
,0,1);
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -21,7 +21,7 @@ import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
@ -369,7 +369,7 @@ public abstract class FieldType extends FieldProperties {
|
||||||
* Default analyzer for types that only produce 1 verbatim token...
|
* Default analyzer for types that only produce 1 verbatim token...
|
||||||
* A maximum size of chars to be read must be specified
|
* A maximum size of chars to be read must be specified
|
||||||
*/
|
*/
|
||||||
protected class DefaultAnalyzer extends SolrAnalyzer {
|
protected final class DefaultAnalyzer extends SolrAnalyzer {
|
||||||
final int maxChars;
|
final int maxChars;
|
||||||
|
|
||||||
DefaultAnalyzer(int maxChars) {
|
DefaultAnalyzer(int maxChars) {
|
||||||
|
@ -379,15 +379,15 @@ public abstract class FieldType extends FieldProperties {
|
||||||
public TokenStreamInfo getStream(String fieldName, Reader reader) {
|
public TokenStreamInfo getStream(String fieldName, Reader reader) {
|
||||||
Tokenizer ts = new Tokenizer(reader) {
|
Tokenizer ts = new Tokenizer(reader) {
|
||||||
final char[] cbuf = new char[maxChars];
|
final char[] cbuf = new char[maxChars];
|
||||||
final TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
|
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
final OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
|
final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
clearAttributes();
|
clearAttributes();
|
||||||
int n = input.read(cbuf,0,maxChars);
|
int n = input.read(cbuf,0,maxChars);
|
||||||
if (n<=0) return false;
|
if (n<=0) return false;
|
||||||
String s = toInternal(new String(cbuf,0,n));
|
String s = toInternal(new String(cbuf,0,n));
|
||||||
termAtt.setTermBuffer(s);
|
termAtt.setEmpty().append(s);
|
||||||
offsetAtt.setOffset(correctOffset(0),correctOffset(n));
|
offsetAtt.setOffset(correctOffset(0),correctOffset(n));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.search.MultiPhraseQuery;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.CachingTokenFilter;
|
import org.apache.lucene.analysis.CachingTokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
@ -88,7 +88,7 @@ public class TextField extends FieldType {
|
||||||
source = analyzer.tokenStream(field, new StringReader(queryText));
|
source = analyzer.tokenStream(field, new StringReader(queryText));
|
||||||
}
|
}
|
||||||
CachingTokenFilter buffer = new CachingTokenFilter(source);
|
CachingTokenFilter buffer = new CachingTokenFilter(source);
|
||||||
TermAttribute termAtt = null;
|
CharTermAttribute termAtt = null;
|
||||||
PositionIncrementAttribute posIncrAtt = null;
|
PositionIncrementAttribute posIncrAtt = null;
|
||||||
int numTokens = 0;
|
int numTokens = 0;
|
||||||
|
|
||||||
|
@ -100,11 +100,11 @@ public class TextField extends FieldType {
|
||||||
// success==false if we hit an exception
|
// success==false if we hit an exception
|
||||||
}
|
}
|
||||||
if (success) {
|
if (success) {
|
||||||
if (buffer.hasAttribute(TermAttribute.class)) {
|
if (buffer.hasAttribute(CharTermAttribute.class)) {
|
||||||
termAtt = (TermAttribute) buffer.getAttribute(TermAttribute.class);
|
termAtt = buffer.getAttribute(CharTermAttribute.class);
|
||||||
}
|
}
|
||||||
if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
|
if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
|
||||||
posIncrAtt = (PositionIncrementAttribute) buffer.getAttribute(PositionIncrementAttribute.class);
|
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -147,7 +147,7 @@ public class TextField extends FieldType {
|
||||||
try {
|
try {
|
||||||
boolean hasNext = buffer.incrementToken();
|
boolean hasNext = buffer.incrementToken();
|
||||||
assert hasNext == true;
|
assert hasNext == true;
|
||||||
term = termAtt.term();
|
term = termAtt.toString();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// safe to ignore, because we know the number of tokens
|
// safe to ignore, because we know the number of tokens
|
||||||
}
|
}
|
||||||
|
@ -164,7 +164,7 @@ public class TextField extends FieldType {
|
||||||
try {
|
try {
|
||||||
boolean hasNext = buffer.incrementToken();
|
boolean hasNext = buffer.incrementToken();
|
||||||
assert hasNext == true;
|
assert hasNext == true;
|
||||||
term = termAtt.term();
|
term = termAtt.toString();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// safe to ignore, because we know the number of tokens
|
// safe to ignore, because we know the number of tokens
|
||||||
}
|
}
|
||||||
|
@ -188,7 +188,7 @@ public class TextField extends FieldType {
|
||||||
try {
|
try {
|
||||||
boolean hasNext = buffer.incrementToken();
|
boolean hasNext = buffer.incrementToken();
|
||||||
assert hasNext == true;
|
assert hasNext == true;
|
||||||
term = termAtt.term();
|
term = termAtt.toString();
|
||||||
if (posIncrAtt != null) {
|
if (posIncrAtt != null) {
|
||||||
positionIncrement = posIncrAtt.getPositionIncrement();
|
positionIncrement = posIncrAtt.getPositionIncrement();
|
||||||
}
|
}
|
||||||
|
@ -229,7 +229,7 @@ public class TextField extends FieldType {
|
||||||
try {
|
try {
|
||||||
boolean hasNext = buffer.incrementToken();
|
boolean hasNext = buffer.incrementToken();
|
||||||
assert hasNext == true;
|
assert hasNext == true;
|
||||||
term = termAtt.term();
|
term = termAtt.toString();
|
||||||
if (posIncrAtt != null) {
|
if (posIncrAtt != null) {
|
||||||
positionIncrement = posIncrAtt.getPositionIncrement();
|
positionIncrement = posIncrAtt.getPositionIncrement();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1043,7 +1043,7 @@ class ExtendedDismaxQParser extends QParser {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ExtendedAnalyzer extends Analyzer {
|
final class ExtendedAnalyzer extends Analyzer {
|
||||||
final Map<String, Analyzer> map = new HashMap<String, Analyzer>();
|
final Map<String, Analyzer> map = new HashMap<String, Analyzer>();
|
||||||
final QParser parser;
|
final QParser parser;
|
||||||
final Analyzer queryAnalyzer;
|
final Analyzer queryAnalyzer;
|
||||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
|
|
||||||
|
|
||||||
|
@ -105,15 +105,15 @@ public class SpellingQueryConverter extends QueryConverter {
|
||||||
try {
|
try {
|
||||||
stream = analyzer.reusableTokenStream("", new StringReader(word));
|
stream = analyzer.reusableTokenStream("", new StringReader(word));
|
||||||
// TODO: support custom attributes
|
// TODO: support custom attributes
|
||||||
TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class);
|
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
|
||||||
FlagsAttribute flagsAtt = (FlagsAttribute) stream.addAttribute(FlagsAttribute.class);
|
FlagsAttribute flagsAtt = stream.addAttribute(FlagsAttribute.class);
|
||||||
TypeAttribute typeAtt = (TypeAttribute) stream.addAttribute(TypeAttribute.class);
|
TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
|
||||||
PayloadAttribute payloadAtt = (PayloadAttribute) stream.addAttribute(PayloadAttribute.class);
|
PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
|
||||||
PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) stream.addAttribute(PositionIncrementAttribute.class);
|
PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
|
||||||
stream.reset();
|
stream.reset();
|
||||||
while (stream.incrementToken()) {
|
while (stream.incrementToken()) {
|
||||||
Token token = new Token();
|
Token token = new Token();
|
||||||
token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
|
token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
|
||||||
token.setStartOffset(matcher.start());
|
token.setStartOffset(matcher.start());
|
||||||
token.setEndOffset(matcher.end());
|
token.setEndOffset(matcher.end());
|
||||||
token.setFlags(flagsAtt.getFlags());
|
token.setFlags(flagsAtt.getFlags());
|
||||||
|
|
|
@ -24,7 +24,7 @@ import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests CommonGramsQueryFilter
|
* Tests CommonGramsQueryFilter
|
||||||
|
@ -38,20 +38,20 @@ public class CommonGramsFilterTest extends BaseTokenTestCase {
|
||||||
WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
|
WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
|
||||||
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
|
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
|
||||||
|
|
||||||
TermAttribute term = cgf.addAttribute(TermAttribute.class);
|
CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
|
||||||
assertTrue(cgf.incrementToken());
|
assertTrue(cgf.incrementToken());
|
||||||
assertEquals("How", term.term());
|
assertEquals("How", term.toString());
|
||||||
assertTrue(cgf.incrementToken());
|
assertTrue(cgf.incrementToken());
|
||||||
assertEquals("How_the", term.term());
|
assertEquals("How_the", term.toString());
|
||||||
assertTrue(cgf.incrementToken());
|
assertTrue(cgf.incrementToken());
|
||||||
assertEquals("the", term.term());
|
assertEquals("the", term.toString());
|
||||||
assertTrue(cgf.incrementToken());
|
assertTrue(cgf.incrementToken());
|
||||||
assertEquals("the_s", term.term());
|
assertEquals("the_s", term.toString());
|
||||||
|
|
||||||
wt.reset(new StringReader(input));
|
wt.reset(new StringReader(input));
|
||||||
cgf.reset();
|
cgf.reset();
|
||||||
assertTrue(cgf.incrementToken());
|
assertTrue(cgf.incrementToken());
|
||||||
assertEquals("How", term.term());
|
assertEquals("How", term.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testQueryReset() throws Exception {
|
public void testQueryReset() throws Exception {
|
||||||
|
@ -60,16 +60,16 @@ public class CommonGramsFilterTest extends BaseTokenTestCase {
|
||||||
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
|
CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
|
||||||
CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
|
CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
|
||||||
|
|
||||||
TermAttribute term = wt.addAttribute(TermAttribute.class);
|
CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
|
||||||
assertTrue(nsf.incrementToken());
|
assertTrue(nsf.incrementToken());
|
||||||
assertEquals("How_the", term.term());
|
assertEquals("How_the", term.toString());
|
||||||
assertTrue(nsf.incrementToken());
|
assertTrue(nsf.incrementToken());
|
||||||
assertEquals("the_s", term.term());
|
assertEquals("the_s", term.toString());
|
||||||
|
|
||||||
wt.reset(new StringReader(input));
|
wt.reset(new StringReader(input));
|
||||||
nsf.reset();
|
nsf.reset();
|
||||||
assertTrue(nsf.incrementToken());
|
assertTrue(nsf.incrementToken());
|
||||||
assertEquals("How_the", term.term());
|
assertEquals("How_the", term.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -22,7 +22,7 @@ import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
|
||||||
public class DoubleMetaphoneFilterFactoryTest extends BaseTokenTestCase {
|
public class DoubleMetaphoneFilterFactoryTest extends BaseTokenTestCase {
|
||||||
|
|
||||||
|
@ -59,12 +59,12 @@ public class DoubleMetaphoneFilterFactoryTest extends BaseTokenTestCase {
|
||||||
TokenStream inputStream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("international"));
|
TokenStream inputStream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("international"));
|
||||||
|
|
||||||
TokenStream filteredStream = factory.create(inputStream);
|
TokenStream filteredStream = factory.create(inputStream);
|
||||||
TermAttribute termAtt = filteredStream.addAttribute(TermAttribute.class);
|
CharTermAttribute termAtt = filteredStream.addAttribute(CharTermAttribute.class);
|
||||||
assertEquals(DoubleMetaphoneFilter.class, filteredStream.getClass());
|
assertEquals(DoubleMetaphoneFilter.class, filteredStream.getClass());
|
||||||
|
|
||||||
assertTrue(filteredStream.incrementToken());
|
assertTrue(filteredStream.incrementToken());
|
||||||
assertEquals(13, termAtt.termLength());
|
assertEquals(13, termAtt.length());
|
||||||
assertEquals("international", termAtt.term());
|
assertEquals("international", termAtt.toString());
|
||||||
filteredStream.reset();
|
filteredStream.reset();
|
||||||
|
|
||||||
// ensure there are no more tokens, such as ANTRNXNL
|
// ensure there are no more tokens, such as ANTRNXNL
|
||||||
|
|
|
@ -21,7 +21,7 @@ import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
@ -75,18 +75,18 @@ public class TestBufferedTokenStream extends BaseTokenTestCase {
|
||||||
final String input = "How now A B brown A cow B like A B thing?";
|
final String input = "How now A B brown A cow B like A B thing?";
|
||||||
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
|
Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input));
|
||||||
TokenStream ts = new AB_AAB_Stream(tokenizer);
|
TokenStream ts = new AB_AAB_Stream(tokenizer);
|
||||||
TermAttribute term = ts.addAttribute(TermAttribute.class);
|
CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
|
||||||
assertTrue(ts.incrementToken());
|
assertTrue(ts.incrementToken());
|
||||||
assertEquals("How", term.term());
|
assertEquals("How", term.toString());
|
||||||
assertTrue(ts.incrementToken());
|
assertTrue(ts.incrementToken());
|
||||||
assertEquals("now", term.term());
|
assertEquals("now", term.toString());
|
||||||
assertTrue(ts.incrementToken());
|
assertTrue(ts.incrementToken());
|
||||||
assertEquals("A", term.term());
|
assertEquals("A", term.toString());
|
||||||
// reset back to input,
|
// reset back to input,
|
||||||
// if reset() does not work correctly then previous buffered tokens will remain
|
// if reset() does not work correctly then previous buffered tokens will remain
|
||||||
tokenizer.reset(new StringReader(input));
|
tokenizer.reset(new StringReader(input));
|
||||||
ts.reset();
|
ts.reset();
|
||||||
assertTrue(ts.incrementToken());
|
assertTrue(ts.incrementToken());
|
||||||
assertEquals("How", term.term());
|
assertEquals("How", term.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,7 +30,7 @@ import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.KeywordTokenizer;
|
import org.apache.lucene.analysis.KeywordTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.solr.common.ResourceLoader;
|
import org.apache.solr.common.ResourceLoader;
|
||||||
|
|
||||||
public class TestCollationKeyFilterFactory extends BaseTokenTestCase {
|
public class TestCollationKeyFilterFactory extends BaseTokenTestCase {
|
||||||
|
@ -177,13 +177,13 @@ public class TestCollationKeyFilterFactory extends BaseTokenTestCase {
|
||||||
|
|
||||||
private void assertCollatesToSame(TokenStream stream1, TokenStream stream2)
|
private void assertCollatesToSame(TokenStream stream1, TokenStream stream2)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
TermAttribute term1 = stream1
|
CharTermAttribute term1 = stream1
|
||||||
.addAttribute(TermAttribute.class);
|
.addAttribute(CharTermAttribute.class);
|
||||||
TermAttribute term2 = stream2
|
CharTermAttribute term2 = stream2
|
||||||
.addAttribute(TermAttribute.class);
|
.addAttribute(CharTermAttribute.class);
|
||||||
assertTrue(stream1.incrementToken());
|
assertTrue(stream1.incrementToken());
|
||||||
assertTrue(stream2.incrementToken());
|
assertTrue(stream2.incrementToken());
|
||||||
assertEquals(term1.term(), term2.term());
|
assertEquals(term1.toString(), term2.toString());
|
||||||
assertFalse(stream1.incrementToken());
|
assertFalse(stream1.incrementToken());
|
||||||
assertFalse(stream2.incrementToken());
|
assertFalse(stream2.incrementToken());
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.lucene.analysis.CharStream;
|
||||||
import org.apache.lucene.analysis.MappingCharFilter;
|
import org.apache.lucene.analysis.MappingCharFilter;
|
||||||
import org.apache.lucene.analysis.NormalizeCharMap;
|
import org.apache.lucene.analysis.NormalizeCharMap;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
|
||||||
public class TestPatternTokenizerFactory extends BaseTokenTestCase
|
public class TestPatternTokenizerFactory extends BaseTokenTestCase
|
||||||
{
|
{
|
||||||
|
@ -117,17 +117,17 @@ public class TestPatternTokenizerFactory extends BaseTokenTestCase
|
||||||
*/
|
*/
|
||||||
private static String tsToString(TokenStream in) throws IOException {
|
private static String tsToString(TokenStream in) throws IOException {
|
||||||
StringBuilder out = new StringBuilder();
|
StringBuilder out = new StringBuilder();
|
||||||
TermAttribute termAtt = in.addAttribute(TermAttribute.class);
|
CharTermAttribute termAtt = in.addAttribute(CharTermAttribute.class);
|
||||||
// extra safety to enforce, that the state is not preserved and also
|
// extra safety to enforce, that the state is not preserved and also
|
||||||
// assign bogus values
|
// assign bogus values
|
||||||
in.clearAttributes();
|
in.clearAttributes();
|
||||||
termAtt.setTermBuffer("bogusTerm");
|
termAtt.setEmpty().append("bogusTerm");
|
||||||
while (in.incrementToken()) {
|
while (in.incrementToken()) {
|
||||||
if (out.length() > 0)
|
if (out.length() > 0)
|
||||||
out.append(' ');
|
out.append(' ');
|
||||||
out.append(termAtt.term());
|
out.append(termAtt.toString());
|
||||||
in.clearAttributes();
|
in.clearAttributes();
|
||||||
termAtt.setTermBuffer("bogusTerm");
|
termAtt.setEmpty().append("bogusTerm");
|
||||||
}
|
}
|
||||||
|
|
||||||
in.close();
|
in.close();
|
||||||
|
|
|
@ -21,7 +21,7 @@ import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -44,14 +44,14 @@ public class TestRemoveDuplicatesTokenFilter extends BaseTokenTestCase {
|
||||||
RemoveDuplicatesTokenFilterFactory factory = new RemoveDuplicatesTokenFilterFactory();
|
RemoveDuplicatesTokenFilterFactory factory = new RemoveDuplicatesTokenFilterFactory();
|
||||||
final TokenStream ts = factory.create
|
final TokenStream ts = factory.create
|
||||||
(new TokenStream() {
|
(new TokenStream() {
|
||||||
TermAttribute termAtt = addAttribute(TermAttribute.class);
|
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||||
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||||
public boolean incrementToken() {
|
public boolean incrementToken() {
|
||||||
if (toks.hasNext()) {
|
if (toks.hasNext()) {
|
||||||
clearAttributes();
|
clearAttributes();
|
||||||
Token tok = toks.next();
|
Token tok = toks.next();
|
||||||
termAtt.setTermBuffer(tok.term());
|
termAtt.setEmpty().append(tok.term());
|
||||||
offsetAtt.setOffset(tok.startOffset(), tok.endOffset());
|
offsetAtt.setOffset(tok.startOffset(), tok.endOffset());
|
||||||
posIncAtt.setPositionIncrement(tok.getPositionIncrement());
|
posIncAtt.setPositionIncrement(tok.getPositionIncrement());
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -381,7 +381,7 @@ public class TestSynonymFilter extends BaseTokenTestCase {
|
||||||
private static class IterTokenStream extends TokenStream {
|
private static class IterTokenStream extends TokenStream {
|
||||||
final Token tokens[];
|
final Token tokens[];
|
||||||
int index = 0;
|
int index = 0;
|
||||||
TermAttribute termAtt = addAttribute(TermAttribute.class);
|
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||||
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||||
FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
|
FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
|
||||||
|
@ -403,7 +403,7 @@ public class TestSynonymFilter extends BaseTokenTestCase {
|
||||||
else {
|
else {
|
||||||
clearAttributes();
|
clearAttributes();
|
||||||
Token token = tokens[index++];
|
Token token = tokens[index++];
|
||||||
termAtt.setTermBuffer(token.term());
|
termAtt.setEmpty().append(token.term());
|
||||||
offsetAtt.setOffset(token.startOffset(), token.endOffset());
|
offsetAtt.setOffset(token.startOffset(), token.endOffset());
|
||||||
posIncAtt.setPositionIncrement(token.getPositionIncrement());
|
posIncAtt.setPositionIncrement(token.getPositionIncrement());
|
||||||
flagsAtt.setFlags(token.getFlags());
|
flagsAtt.setFlags(token.getFlags());
|
||||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -81,7 +81,7 @@ public class TestTrimFilter extends BaseTokenTestCase {
|
||||||
private static class IterTokenStream extends TokenStream {
|
private static class IterTokenStream extends TokenStream {
|
||||||
final Token tokens[];
|
final Token tokens[];
|
||||||
int index = 0;
|
int index = 0;
|
||||||
TermAttribute termAtt = addAttribute(TermAttribute.class);
|
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||||
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||||
FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
|
FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
|
||||||
|
@ -103,7 +103,7 @@ public class TestTrimFilter extends BaseTokenTestCase {
|
||||||
else {
|
else {
|
||||||
clearAttributes();
|
clearAttributes();
|
||||||
Token token = tokens[index++];
|
Token token = tokens[index++];
|
||||||
termAtt.setTermBuffer(token.term());
|
termAtt.setEmpty().append(token.term());
|
||||||
offsetAtt.setOffset(token.startOffset(), token.endOffset());
|
offsetAtt.setOffset(token.startOffset(), token.endOffset());
|
||||||
posIncAtt.setPositionIncrement(token.getPositionIncrement());
|
posIncAtt.setPositionIncrement(token.getPositionIncrement());
|
||||||
flagsAtt.setFlags(token.getFlags());
|
flagsAtt.setFlags(token.getFlags());
|
||||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||||
import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream;
|
import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream;
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.solr.SolrTestCaseJ4;
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
@ -347,19 +347,17 @@ public class TestWordDelimiterFilter extends SolrTestCaseJ4 {
|
||||||
* Set a large position increment gap of 10 if the token is "largegap" or "/"
|
* Set a large position increment gap of 10 if the token is "largegap" or "/"
|
||||||
*/
|
*/
|
||||||
private final class LargePosIncTokenFilter extends TokenFilter {
|
private final class LargePosIncTokenFilter extends TokenFilter {
|
||||||
private TermAttribute termAtt;
|
private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
private PositionIncrementAttribute posIncAtt;
|
private PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||||
|
|
||||||
protected LargePosIncTokenFilter(TokenStream input) {
|
protected LargePosIncTokenFilter(TokenStream input) {
|
||||||
super(input);
|
super(input);
|
||||||
termAtt = addAttribute(TermAttribute.class);
|
|
||||||
posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
if (input.incrementToken()) {
|
if (input.incrementToken()) {
|
||||||
if (termAtt.term().equals("largegap") || termAtt.term().equals("/"))
|
if (termAtt.toString().equals("largegap") || termAtt.toString().equals("/"))
|
||||||
posIncAtt.setPositionIncrement(10);
|
posIncAtt.setPositionIncrement(10);
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -19,11 +19,11 @@ package org.apache.solr.spelling;
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
@ -43,18 +43,18 @@ class SimpleQueryConverter extends SpellingQueryConverter{
|
||||||
WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
|
WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
|
||||||
TokenStream ts = analyzer.tokenStream("", new StringReader(origQuery));
|
TokenStream ts = analyzer.tokenStream("", new StringReader(origQuery));
|
||||||
// TODO: support custom attributes
|
// TODO: support custom attributes
|
||||||
TermAttribute termAtt = (TermAttribute) ts.addAttribute(TermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
OffsetAttribute offsetAtt = (OffsetAttribute) ts.addAttribute(OffsetAttribute.class);
|
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
||||||
TypeAttribute typeAtt = (TypeAttribute) ts.addAttribute(TypeAttribute.class);
|
TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
|
||||||
FlagsAttribute flagsAtt = (FlagsAttribute) ts.addAttribute(FlagsAttribute.class);
|
FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
|
||||||
PayloadAttribute payloadAtt = (PayloadAttribute) ts.addAttribute(PayloadAttribute.class);
|
PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
|
||||||
PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) ts.addAttribute(PositionIncrementAttribute.class);
|
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
ts.reset();
|
ts.reset();
|
||||||
while (ts.incrementToken()){
|
while (ts.incrementToken()){
|
||||||
Token tok = new Token();
|
Token tok = new Token();
|
||||||
tok.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
|
tok.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
|
||||||
tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
|
tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
|
||||||
tok.setFlags(flagsAtt.getFlags());
|
tok.setFlags(flagsAtt.getFlags());
|
||||||
tok.setPayload(payloadAtt.getPayload());
|
tok.setPayload(payloadAtt.getPayload());
|
||||||
|
|
|
@ -213,17 +213,17 @@
|
||||||
|
|
||||||
final Iterator<Token> iter = tokens.iterator();
|
final Iterator<Token> iter = tokens.iterator();
|
||||||
tstream = filtfac.create( new TokenStream() {
|
tstream = filtfac.create( new TokenStream() {
|
||||||
TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
|
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute (OffsetAttribute.class);
|
OffsetAttribute offsetAtt = addAttribute (OffsetAttribute.class);
|
||||||
TypeAttribute typeAtt = (TypeAttribute) addAttribute (TypeAttribute.class);
|
TypeAttribute typeAtt = addAttribute (TypeAttribute.class);
|
||||||
FlagsAttribute flagsAtt = (FlagsAttribute) addAttribute (FlagsAttribute.class);
|
FlagsAttribute flagsAtt = addAttribute (FlagsAttribute.class);
|
||||||
PayloadAttribute payloadAtt = (PayloadAttribute) addAttribute (PayloadAttribute.class);
|
PayloadAttribute payloadAtt = addAttribute (PayloadAttribute.class);
|
||||||
PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) addAttribute (PositionIncrementAttribute.class);
|
PositionIncrementAttribute posIncAtt = addAttribute (PositionIncrementAttribute.class);
|
||||||
|
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
if (iter.hasNext()) {
|
if (iter.hasNext()) {
|
||||||
Token token = iter.next();
|
Token token = iter.next();
|
||||||
termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
|
termAtt.copyBuffer(token.termBuffer(), 0, token.termLength());
|
||||||
offsetAtt.setOffset(token.startOffset(), token.endOffset());
|
offsetAtt.setOffset(token.startOffset(), token.endOffset());
|
||||||
typeAtt.setType(token.type());
|
typeAtt.setType(token.type());
|
||||||
flagsAtt.setFlags(token.getFlags());
|
flagsAtt.setFlags(token.getFlags());
|
||||||
|
@ -255,19 +255,19 @@
|
||||||
|
|
||||||
static List<Token> getTokens(TokenStream tstream) throws IOException {
|
static List<Token> getTokens(TokenStream tstream) throws IOException {
|
||||||
List<Token> tokens = new ArrayList<Token>();
|
List<Token> tokens = new ArrayList<Token>();
|
||||||
TermAttribute termAtt = (TermAttribute) tstream.addAttribute(TermAttribute.class);
|
CharTermAttribute termAtt = tstream.addAttribute(CharTermAttribute.class);
|
||||||
OffsetAttribute offsetAtt = (OffsetAttribute) tstream.addAttribute (OffsetAttribute.class);
|
OffsetAttribute offsetAtt = tstream.addAttribute (OffsetAttribute.class);
|
||||||
TypeAttribute typeAtt = (TypeAttribute) tstream.addAttribute (TypeAttribute.class);
|
TypeAttribute typeAtt = tstream.addAttribute (TypeAttribute.class);
|
||||||
FlagsAttribute flagsAtt = (FlagsAttribute) tstream.addAttribute (FlagsAttribute.class);
|
FlagsAttribute flagsAtt = tstream.addAttribute (FlagsAttribute.class);
|
||||||
PayloadAttribute payloadAtt = (PayloadAttribute) tstream.addAttribute (PayloadAttribute.class);
|
PayloadAttribute payloadAtt = tstream.addAttribute (PayloadAttribute.class);
|
||||||
PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) tstream.addAttribute (PositionIncrementAttribute.class);
|
PositionIncrementAttribute posIncAtt = tstream.addAttribute (PositionIncrementAttribute.class);
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
if (!tstream.incrementToken())
|
if (!tstream.incrementToken())
|
||||||
break;
|
break;
|
||||||
else {
|
else {
|
||||||
Token token = new Token();
|
Token token = new Token();
|
||||||
token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
|
token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
|
||||||
token.setType(typeAtt.type());
|
token.setType(typeAtt.type());
|
||||||
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
|
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
|
||||||
token.setPayload(payloadAtt.getPayload());
|
token.setPayload(payloadAtt.getPayload());
|
||||||
|
|
Loading…
Reference in New Issue