From 71291daa74b9e687d897a2890bdc37e0d9156ba9 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Wed, 11 Apr 2012 12:16:31 +0000 Subject: [PATCH] LUCENE-3969: when outputting a bigram token, mark posLen=2 to note that it spans two tokens git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3969@1324727 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/analysis/commongrams/CommonGramsFilter.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java index 8232b88c2bf..9798464f938 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java @@ -16,6 +16,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; @@ -54,6 +55,7 @@ public final class CommonGramsFilter extends TokenFilter { private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class); private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class); + private final PositionLengthAttribute posLenAttribute = addAttribute(PositionLengthAttribute.class); private int lastStartOffset; private boolean lastWasCommon; @@ -166,6 +168,7 @@ public final class CommonGramsFilter extends TokenFilter { buffer.getChars(0, length, termText, 0); termAttribute.setLength(length); posIncAttribute.setPositionIncrement(0); + posLenAttribute.setPositionLength(2); // bigram offsetAttribute.setOffset(lastStartOffset, endOffset); typeAttribute.setType(GRAM_TYPE); buffer.setLength(0);