LUCENE-3044: ThaiWordFilter uses AttributeSource.copyTo incorrectly

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1096334 13f79535-47bb-0310-9956-ffa450edef68
2011-04-24 15:45:45 +00:00 · 2011-04-24 15:45:45 +00:00 · 593d7a54ea
parent 6a85962022
commit 593d7a54ea
3 changed files with 24 additions and 0 deletions
--- a/lucene/contrib/CHANGES.txt
+++ b/lucene/contrib/CHANGES.txt
@ -57,6 +57,10 @@ Bug Fixes
 * LUCENE-3043: GermanStemmer threw IndexOutOfBoundsException if it encountered
   a zero-length token.  (Robert Muir)
 * LUCENE-3044: ThaiWordFilter didn't reset its cached state correctly, this only
   caused a problem if you consumed a tokenstream, then reused it, added different
   attributes to it, and consumed it again.  (Robert Muir, Uwe Schindler)
 New Features
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
@ -131,5 +131,8 @@ public final class ThaiWordFilter extends TokenFilter {
  public void reset() throws IOException {
    super.reset();
    hasMoreTokensInClone = false;
    clonedToken = null;
    clonedTermAtt = null;
    clonedOffsetAtt = null;
  }
 }
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
@ -17,7 +17,11 @@ package org.apache.lucene.analysis.th;
 * limitations under the License.
 */
 import java.io.StringReader;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
 import org.apache.lucene.util.Version;
 /**
@ -148,4 +152,17 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
  public void testRandomStrings() throws Exception {
    checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
  }
  // LUCENE-3044
  public void testAttributeReuse() throws Exception {
    assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE);
    ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
    // just consume
    TokenStream ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย"));
    assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
    // this consumer adds flagsAtt, which this analyzer does not use. 
    ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย"));
    ts.addAttribute(FlagsAttribute.class);
    assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
  }
 }