mirror of https://github.com/apache/lucene.git
LUCENE-3044: ThaiWordFilter uses AttributeSource.copyTo incorrectly
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1096334 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6a85962022
commit
593d7a54ea
|
@ -57,6 +57,10 @@ Bug Fixes
|
|||
|
||||
* LUCENE-3043: GermanStemmer threw IndexOutOfBoundsException if it encountered
|
||||
a zero-length token. (Robert Muir)
|
||||
|
||||
* LUCENE-3044: ThaiWordFilter didn't reset its cached state correctly, this only
|
||||
caused a problem if you consumed a tokenstream, then reused it, added different
|
||||
attributes to it, and consumed it again. (Robert Muir, Uwe Schindler)
|
||||
|
||||
New Features
|
||||
|
||||
|
|
|
@ -131,5 +131,8 @@ public final class ThaiWordFilter extends TokenFilter {
|
|||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
hasMoreTokensInClone = false;
|
||||
clonedToken = null;
|
||||
clonedTermAtt = null;
|
||||
clonedOffsetAtt = null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,7 +17,11 @@ package org.apache.lucene.analysis.th;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
|
@ -148,4 +152,17 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testRandomStrings() throws Exception {
|
||||
checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||
}
|
||||
|
||||
// LUCENE-3044
|
||||
public void testAttributeReuse() throws Exception {
|
||||
assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE);
|
||||
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
|
||||
// just consume
|
||||
TokenStream ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย"));
|
||||
assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
|
||||
// this consumer adds flagsAtt, which this analyzer does not use.
|
||||
ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย"));
|
||||
ts.addAttribute(FlagsAttribute.class);
|
||||
assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue