LUCENE-3044: ThaiWordFilter uses AttributeSource.copyTo incorrectly

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1096334 13f79535-47bb-0310-9956-ffa450edef68
2011-04-24 15:45:45 +00:00 · 2011-04-24 15:45:45 +00:00 · 593d7a54ea
parent 6a85962022
commit 593d7a54ea
3 changed files with 24 additions and 0 deletions
--- a/lucene/contrib/CHANGES.txt
+++ b/lucene/contrib/CHANGES.txt
@ -58,6 +58,10 @@ Bug Fixes
 * LUCENE-3043: GermanStemmer threw IndexOutOfBoundsException if it encountered
   a zero-length token.  (Robert Muir)
   
+ * LUCENE-3044: ThaiWordFilter didn't reset its cached state correctly, this only
+   caused a problem if you consumed a tokenstream, then reused it, added different
+   attributes to it, and consumed it again.  (Robert Muir, Uwe Schindler)
+
 New Features

 * LUCENE-3016: Add analyzer for Latvian.  (Robert Muir)
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
@ -131,5 +131,8 @@ public final class ThaiWordFilter extends TokenFilter {
  public void reset() throws IOException {
    super.reset();
    hasMoreTokensInClone = false;
+    clonedToken = null;
+    clonedTermAtt = null;
+    clonedOffsetAtt = null;
  }
 }
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
@ -17,7 +17,11 @@ package org.apache.lucene.analysis.th;
 * limitations under the License.
 */

+import java.io.StringReader;
+
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
 import org.apache.lucene.util.Version;

 /**
@ -148,4 +152,17 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
  public void testRandomStrings() throws Exception {
    checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
  }
+  
+  // LUCENE-3044
+  public void testAttributeReuse() throws Exception {
+    assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE);
+    ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
+    // just consume
+    TokenStream ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย"));
+    assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
+    // this consumer adds flagsAtt, which this analyzer does not use. 
+    ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย"));
+    ts.addAttribute(FlagsAttribute.class);
+    assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
+  }
 }