copy all attributes including payload to new tokens

2017-01-09 15:00:21 +01:00 · 2017-01-09 15:00:21 +01:00 · 6570e6ecc2
parent 61e4528306
commit 6570e6ecc2
1 changed files with 4 additions and 12 deletions
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
@ -22,9 +22,8 @@ import java.io.IOException;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.util.AttributeSource;

 /**
 * Tokenizes the given token into n-grams of given size(s).
@ -43,15 +42,11 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
  private int curTermLength;
  private int curCodePointCount;
  private int curGramSize;
-  private int tokStart;
-  private int tokEnd; // only used if the length changed before this filter
  private int savePosIncr;
-  private int savePosLen;
+  private AttributeSource attributes;
  
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-  private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);

  /**
   * Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
@ -86,17 +81,15 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
          curTermLength = termAtt.length();
          curCodePointCount = Character.codePointCount(termAtt, 0, termAtt.length());
          curGramSize = minGram;
-          tokStart = offsetAtt.startOffset();
-          tokEnd = offsetAtt.endOffset();
+          attributes = input.cloneAttributes();
          savePosIncr += posIncrAtt.getPositionIncrement();
-          savePosLen = posLenAtt.getPositionLength();
        }
      }
      if (curGramSize <= maxGram) {         // if we have hit the end of our n-gram size range, quit
        if (curGramSize <= curCodePointCount) { // if the remaining input is too short, we can't generate any n-grams
          // grab gramSize chars from front or back
          clearAttributes();
-          offsetAtt.setOffset(tokStart, tokEnd);
+          attributes.copyTo(this);
          // first ngram gets increment, others don't
          if (curGramSize == minGram) {
            posIncrAtt.setPositionIncrement(savePosIncr);
@ -104,7 +97,6 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
          } else {
            posIncrAtt.setPositionIncrement(0);
          }
-          posLenAtt.setPositionLength(savePosLen);
          final int charLength = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curGramSize);
          termAtt.copyBuffer(curTermBuffer, 0, charLength);
          curGramSize++;