mirror of https://github.com/apache/lucene.git
LUCENE-3044: ThaiWordFilter uses AttributeSource.copyTo incorrectly
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1096334 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6a85962022
commit
593d7a54ea
|
@ -58,6 +58,10 @@ Bug Fixes
|
||||||
* LUCENE-3043: GermanStemmer threw IndexOutOfBoundsException if it encountered
|
* LUCENE-3043: GermanStemmer threw IndexOutOfBoundsException if it encountered
|
||||||
a zero-length token. (Robert Muir)
|
a zero-length token. (Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-3044: ThaiWordFilter didn't reset its cached state correctly, this only
|
||||||
|
caused a problem if you consumed a tokenstream, then reused it, added different
|
||||||
|
attributes to it, and consumed it again. (Robert Muir, Uwe Schindler)
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
|
|
||||||
* LUCENE-3016: Add analyzer for Latvian. (Robert Muir)
|
* LUCENE-3016: Add analyzer for Latvian. (Robert Muir)
|
||||||
|
|
|
@ -131,5 +131,8 @@ public final class ThaiWordFilter extends TokenFilter {
|
||||||
public void reset() throws IOException {
|
public void reset() throws IOException {
|
||||||
super.reset();
|
super.reset();
|
||||||
hasMoreTokensInClone = false;
|
hasMoreTokensInClone = false;
|
||||||
|
clonedToken = null;
|
||||||
|
clonedTermAtt = null;
|
||||||
|
clonedOffsetAtt = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,11 @@ package org.apache.lucene.analysis.th;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.io.StringReader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -148,4 +152,17 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
|
||||||
public void testRandomStrings() throws Exception {
|
public void testRandomStrings() throws Exception {
|
||||||
checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE-3044
|
||||||
|
public void testAttributeReuse() throws Exception {
|
||||||
|
assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE);
|
||||||
|
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
|
||||||
|
// just consume
|
||||||
|
TokenStream ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย"));
|
||||||
|
assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
|
||||||
|
// this consumer adds flagsAtt, which this analyzer does not use.
|
||||||
|
ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย"));
|
||||||
|
ts.addAttribute(FlagsAttribute.class);
|
||||||
|
assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue