From 593d7a54eacb79f25e52fdc0e532a4a8ff59c79c Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Sun, 24 Apr 2011 15:45:45 +0000
Subject: [PATCH] LUCENE-3044: ThaiWordFilter uses AttributeSource.copyTo
 incorrectly

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1096334 13f79535-47bb-0310-9956-ffa450edef68
---
 lucene/contrib/CHANGES.txt                      |  4 ++++
 .../lucene/analysis/th/ThaiWordFilter.java      |  3 +++
 .../lucene/analysis/th/TestThaiAnalyzer.java    | 17 +++++++++++++++++
 3 files changed, 24 insertions(+)

diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt
index 2eda19e3c4b..2fa5c4fb996 100644
--- a/lucene/contrib/CHANGES.txt
+++ b/lucene/contrib/CHANGES.txt
@@ -57,6 +57,10 @@ Bug Fixes
 
  * LUCENE-3043: GermanStemmer threw IndexOutOfBoundsException if it encountered
    a zero-length token.  (Robert Muir)
+   
+ * LUCENE-3044: ThaiWordFilter didn't reset its cached state correctly, this only
+   caused a problem if you consumed a tokenstream, then reused it, added different
+   attributes to it, and consumed it again.  (Robert Muir, Uwe Schindler)
 
 New Features
 
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
index 8f0935c11e7..5f3b7c79988 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
@@ -131,5 +131,8 @@ public final class ThaiWordFilter extends TokenFilter {
   public void reset() throws IOException {
     super.reset();
     hasMoreTokensInClone = false;
+    clonedToken = null;
+    clonedTermAtt = null;
+    clonedOffsetAtt = null;
   }
 }
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
index 2b9e2b05905..6247bbf97be 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
@@ -17,7 +17,11 @@ package org.apache.lucene.analysis.th;
  * limitations under the License.
  */
 
+import java.io.StringReader;
+
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
 import org.apache.lucene.util.Version;
 
 /**
@@ -148,4 +152,17 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
   public void testRandomStrings() throws Exception {
     checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
   }
+  
+  // LUCENE-3044
+  public void testAttributeReuse() throws Exception {
+    assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE);
+    ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
+    // just consume
+    TokenStream ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย"));
+    assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
+    // this consumer adds flagsAtt, which this analyzer does not use. 
+    ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย"));
+    ts.addAttribute(FlagsAttribute.class);
+    assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
+  }
 }