From 593d7a54eacb79f25e52fdc0e532a4a8ff59c79c Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sun, 24 Apr 2011 15:45:45 +0000 Subject: [PATCH] LUCENE-3044: ThaiWordFilter uses AttributeSource.copyTo incorrectly git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1096334 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/contrib/CHANGES.txt | 4 ++++ .../lucene/analysis/th/ThaiWordFilter.java | 3 +++ .../lucene/analysis/th/TestThaiAnalyzer.java | 17 +++++++++++++++++ 3 files changed, 24 insertions(+) diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt index 2eda19e3c4b..2fa5c4fb996 100644 --- a/lucene/contrib/CHANGES.txt +++ b/lucene/contrib/CHANGES.txt @@ -57,6 +57,10 @@ Bug Fixes * LUCENE-3043: GermanStemmer threw IndexOutOfBoundsException if it encountered a zero-length token. (Robert Muir) + + * LUCENE-3044: ThaiWordFilter didn't reset its cached state correctly, this only + caused a problem if you consumed a tokenstream, then reused it, added different + attributes to it, and consumed it again. (Robert Muir, Uwe Schindler) New Features diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java index 8f0935c11e7..5f3b7c79988 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java @@ -131,5 +131,8 @@ public final class ThaiWordFilter extends TokenFilter { public void reset() throws IOException { super.reset(); hasMoreTokensInClone = false; + clonedToken = null; + clonedTermAtt = null; + clonedOffsetAtt = null; } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java index 2b9e2b05905..6247bbf97be 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java @@ -17,7 +17,11 @@ package org.apache.lucene.analysis.th; * limitations under the License. */ +import java.io.StringReader; + import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.util.Version; /** @@ -148,4 +152,17 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase { public void testRandomStrings() throws Exception { checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); } + + // LUCENE-3044 + public void testAttributeReuse() throws Exception { + assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE); + ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30); + // just consume + TokenStream ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย")); + assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" }); + // this consumer adds flagsAtt, which this analyzer does not use. + ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย")); + ts.addAttribute(FlagsAttribute.class); + assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" }); + } }