LUCENE-3044: ThaiWordFilter uses AttributeSource.copyTo incorrectly

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1096334 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-04-24 15:45:45 +00:00
parent 6a85962022
commit 593d7a54ea
3 changed files with 24 additions and 0 deletions

View File

@ -58,6 +58,10 @@ Bug Fixes
* LUCENE-3043: GermanStemmer threw IndexOutOfBoundsException if it encountered
a zero-length token. (Robert Muir)
* LUCENE-3044: ThaiWordFilter didn't reset its cached state correctly, this only
caused a problem if you consumed a tokenstream, then reused it, added different
attributes to it, and consumed it again. (Robert Muir, Uwe Schindler)
New Features
* LUCENE-3016: Add analyzer for Latvian. (Robert Muir)

View File

@ -131,5 +131,8 @@ public final class ThaiWordFilter extends TokenFilter {
public void reset() throws IOException {
super.reset();
hasMoreTokensInClone = false;
clonedToken = null;
clonedTermAtt = null;
clonedOffsetAtt = null;
}
}

View File

@ -17,7 +17,11 @@ package org.apache.lucene.analysis.th;
* limitations under the License.
*/
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.util.Version;
/**
@ -148,4 +152,17 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
public void testRandomStrings() throws Exception {
checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
}
// LUCENE-3044
public void testAttributeReuse() throws Exception {
assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE);
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
// just consume
TokenStream ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย"));
assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
// this consumer adds flagsAtt, which this analyzer does not use.
ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย"));
ts.addAttribute(FlagsAttribute.class);
assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
}
}