LUCENE-2014: SmartChineseAnalyzer position increment bug

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@830871 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2009-10-29 09:22:37 +00:00
parent 5c40eb4715
commit 1b38f9c24d
3 changed files with 21 additions and 1 deletions

View File

@ -56,6 +56,11 @@ Bug fixes
* LUCENE-2003: Highlighter doesn't respect position increments other than 1 with
PhraseQuerys. (Uwe Schindler, Mark Miller)
* LUCENE-2014: SmartChineseAnalyzer did not properly clear attributes
in WordTokenFilter. If enablePositionIncrements is set for StopFilter,
then this could create invalid position increments, causing IndexWriter
to crash. (Robert Muir, Uwe Schindler)
New features
* LUCENE-1924: Added BalancedSegmentMergePolicy to contrib/misc,

View File

@ -78,7 +78,8 @@ public final class WordTokenFilter extends TokenFilter {
return false; // no more sentences, end of stream!
}
}
// WordTokenFilter must clear attributes, as it is creating new tokens.
clearAttributes();
// There are remaining tokens from the current sentence, return the next one.
SegToken nextWord = (SegToken) tokenIter.next();
termAtt.setTermBuffer(nextWord.charArray, 0, nextWord.charArray.length);

View File

@ -80,6 +80,20 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
}
}
/*
* Check that position increments after stopwords are correct,
* when stopfilter is configured with enablePositionIncrements
*/
public void testChineseStopWords2() throws Exception {
Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT); /* will load stopwords */
String sentence = "Title:San"; // : is a stopword
String result[] = { "titl", "san"};
int startOffsets[] = { 0, 6 };
int endOffsets[] = { 5, 9 };
int posIncr[] = { 1, 2 };
assertAnalyzesTo(ca, sentence, result, startOffsets, endOffsets, posIncr);
}
public void testChineseAnalyzer() throws Exception {
Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true);
String sentence = "我购买了道具和服装。";