From 537aeb24e0aa524a4d70b9cea789945c624c5f7e Mon Sep 17 00:00:00 2001 From: Michael Busch Date: Sun, 2 Aug 2009 02:10:46 +0000 Subject: [PATCH] LUCENE-1759: Set final offset correctly in contrib TokenStreams. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@799968 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/analysis/cjk/CJKTokenizer.java | 6 ++++++ .../apache/lucene/analysis/cn/ChineseTokenizer.java | 7 ++++++- .../lucene/analysis/ngram/EdgeNGramTokenizer.java | 6 ++++++ .../apache/lucene/analysis/ngram/NGramTokenizer.java | 6 ++++++ .../apache/lucene/index/memory/PatternAnalyzer.java | 11 +++++++++++ 5 files changed, 35 insertions(+), 1 deletion(-) diff --git a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java index 5b7a42287d9..4ab32c9d58c 100644 --- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java +++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java @@ -272,4 +272,10 @@ public final class CJKTokenizer extends Tokenizer { // return an empty string) } } + + public final void end() { + // set final offset + final int finalOffset = offset; + this.offsetAtt.setOffset(finalOffset, finalOffset); + } } diff --git a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java index cc7f7453733..d0054c0971f 100644 --- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java +++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java @@ -139,6 +139,11 @@ public final class ChineseTokenizer extends Tokenizer { break; } } - } + + public final void end() { + // set final offset + final int finalOffset = offset; + this.offsetAtt.setOffset(finalOffset, finalOffset); + } } diff --git a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java index 179ab33208f..70c7cd2e68c 100644 --- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java +++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java @@ -152,6 +152,12 @@ public class EdgeNGramTokenizer extends Tokenizer { return true; } + public final void end() { + // set final offset + final int finalOffset = inLen; + this.offsetAtt.setOffset(finalOffset, finalOffset); + } + /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should * not be overridden. Delegates to the backwards compatibility layer. */ public final Token next(final Token reusableToken) throws java.io.IOException { diff --git a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java index 72f7d8be36f..889eb982daf 100644 --- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java +++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java @@ -97,6 +97,12 @@ public class NGramTokenizer extends Tokenizer { return true; } + public final void end() { + // set final offset + final int finalOffset = inLen; + this.offsetAtt.setOffset(finalOffset, finalOffset); + } + /** @deprecated Will be removed in Lucene 3.0. This method is final, as it should * not be overridden. Delegates to the backwards compatibility layer. */ public final Token next(final Token reusableToken) throws java.io.IOException { diff --git a/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java b/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java index a48cba815ba..52ac4e6ebda 100644 --- a/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java +++ b/contrib/memory/src/java/org/apache/lucene/index/memory/PatternAnalyzer.java @@ -367,6 +367,11 @@ public class PatternAnalyzer extends Analyzer { } } + public final void end() { + // set final offset + final int finalOffset = str.length(); + this.offsetAtt.setOffset(finalOffset, finalOffset); + } } @@ -442,6 +447,12 @@ public class PatternAnalyzer extends Analyzer { return true; } + public final void end() { + // set final offset + final int finalOffset = str.length(); + this.offsetAtt.setOffset(finalOffset, finalOffset); + } + private boolean isTokenChar(char c, boolean isLetter) { return isLetter ? Character.isLetter(c) : !Character.isWhitespace(c); }