diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java index 827e26ffda1..303b7e3201d 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java @@ -22,9 +22,8 @@ import java.io.IOException; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; +import org.apache.lucene.util.AttributeSource; /** * Tokenizes the given token into n-grams of given size(s). @@ -43,15 +42,11 @@ public final class EdgeNGramTokenFilter extends TokenFilter { private int curTermLength; private int curCodePointCount; private int curGramSize; - private int tokStart; - private int tokEnd; // only used if the length changed before this filter private int savePosIncr; - private int savePosLen; + private AttributeSource attributes; private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); - private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class); /** * Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range @@ -86,17 +81,15 @@ public final class EdgeNGramTokenFilter extends TokenFilter { curTermLength = termAtt.length(); curCodePointCount = Character.codePointCount(termAtt, 0, termAtt.length()); curGramSize = minGram; - tokStart = offsetAtt.startOffset(); - tokEnd = offsetAtt.endOffset(); + attributes = input.cloneAttributes(); savePosIncr += posIncrAtt.getPositionIncrement(); - savePosLen = posLenAtt.getPositionLength(); } } if (curGramSize <= maxGram) { // if we have hit the end of our n-gram size range, quit if (curGramSize <= curCodePointCount) { // if the remaining input is too short, we can't generate any n-grams // grab gramSize chars from front or back clearAttributes(); - offsetAtt.setOffset(tokStart, tokEnd); + attributes.copyTo(this); // first ngram gets increment, others don't if (curGramSize == minGram) { posIncrAtt.setPositionIncrement(savePosIncr); @@ -104,7 +97,6 @@ public final class EdgeNGramTokenFilter extends TokenFilter { } else { posIncrAtt.setPositionIncrement(0); } - posLenAtt.setPositionLength(savePosLen); final int charLength = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curGramSize); termAtt.copyBuffer(curTermBuffer, 0, charLength); curGramSize++;