mirror of https://github.com/apache/lucene.git
also copy all attributes for ngram token filters
This commit is contained in:
parent
80e2854247
commit
ea049b96a2
|
@ -23,9 +23,8 @@ import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.miscellaneous.CodepointCountFilter;
|
import org.apache.lucene.analysis.miscellaneous.CodepointCountFilter;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
import org.apache.lucene.util.AttributeSource.State;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tokenizes the input into n-grams of the given size(s).
|
* Tokenizes the input into n-grams of the given size(s).
|
||||||
|
@ -52,14 +51,11 @@ public final class NGramTokenFilter extends TokenFilter {
|
||||||
private int curCodePointCount;
|
private int curCodePointCount;
|
||||||
private int curGramSize;
|
private int curGramSize;
|
||||||
private int curPos;
|
private int curPos;
|
||||||
private int curPosInc, curPosLen;
|
private int curPosInc;
|
||||||
private int tokStart;
|
private State state;
|
||||||
private int tokEnd;
|
|
||||||
|
|
||||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
private final PositionIncrementAttribute posIncAtt;
|
private final PositionIncrementAttribute posIncAtt;
|
||||||
private final PositionLengthAttribute posLenAtt;
|
|
||||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates NGramTokenFilter with given min and max n-grams.
|
* Creates NGramTokenFilter with given min and max n-grams.
|
||||||
|
@ -79,7 +75,6 @@ public final class NGramTokenFilter extends TokenFilter {
|
||||||
this.maxGram = maxGram;
|
this.maxGram = maxGram;
|
||||||
|
|
||||||
posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||||
posLenAtt = addAttribute(PositionLengthAttribute.class);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -104,9 +99,7 @@ public final class NGramTokenFilter extends TokenFilter {
|
||||||
curGramSize = minGram;
|
curGramSize = minGram;
|
||||||
curPos = 0;
|
curPos = 0;
|
||||||
curPosInc = posIncAtt.getPositionIncrement();
|
curPosInc = posIncAtt.getPositionIncrement();
|
||||||
curPosLen = posLenAtt.getPositionLength();
|
state = captureState();
|
||||||
tokStart = offsetAtt.startOffset();
|
|
||||||
tokEnd = offsetAtt.endOffset();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,14 +108,12 @@ public final class NGramTokenFilter extends TokenFilter {
|
||||||
curGramSize = minGram;
|
curGramSize = minGram;
|
||||||
}
|
}
|
||||||
if ((curPos + curGramSize) <= curCodePointCount) {
|
if ((curPos + curGramSize) <= curCodePointCount) {
|
||||||
clearAttributes();
|
restoreState(state);
|
||||||
final int start = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos);
|
final int start = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos);
|
||||||
final int end = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
|
final int end = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
|
||||||
termAtt.copyBuffer(curTermBuffer, start, end - start);
|
termAtt.copyBuffer(curTermBuffer, start, end - start);
|
||||||
posIncAtt.setPositionIncrement(curPosInc);
|
posIncAtt.setPositionIncrement(curPosInc);
|
||||||
curPosInc = 0;
|
curPosInc = 0;
|
||||||
posLenAtt.setPositionLength(curPosLen);
|
|
||||||
offsetAtt.setOffset(tokStart, tokEnd);
|
|
||||||
curGramSize++;
|
curGramSize++;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue