mirror of https://github.com/apache/lucene.git
LUCENE-2014: SmartChineseAnalyzer position increment bug
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@830871 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5c40eb4715
commit
1b38f9c24d
|
@ -56,6 +56,11 @@ Bug fixes
|
|||
* LUCENE-2003: Highlighter doesn't respect position increments other than 1 with
|
||||
PhraseQuerys. (Uwe Schindler, Mark Miller)
|
||||
|
||||
* LUCENE-2014: SmartChineseAnalyzer did not properly clear attributes
|
||||
in WordTokenFilter. If enablePositionIncrements is set for StopFilter,
|
||||
then this could create invalid position increments, causing IndexWriter
|
||||
to crash. (Robert Muir, Uwe Schindler)
|
||||
|
||||
New features
|
||||
|
||||
* LUCENE-1924: Added BalancedSegmentMergePolicy to contrib/misc,
|
||||
|
|
|
@ -78,7 +78,8 @@ public final class WordTokenFilter extends TokenFilter {
|
|||
return false; // no more sentences, end of stream!
|
||||
}
|
||||
}
|
||||
|
||||
// WordTokenFilter must clear attributes, as it is creating new tokens.
|
||||
clearAttributes();
|
||||
// There are remaining tokens from the current sentence, return the next one.
|
||||
SegToken nextWord = (SegToken) tokenIter.next();
|
||||
termAtt.setTermBuffer(nextWord.charArray, 0, nextWord.charArray.length);
|
||||
|
|
|
@ -80,6 +80,20 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that position increments after stopwords are correct,
|
||||
* when stopfilter is configured with enablePositionIncrements
|
||||
*/
|
||||
public void testChineseStopWords2() throws Exception {
|
||||
Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT); /* will load stopwords */
|
||||
String sentence = "Title:San"; // : is a stopword
|
||||
String result[] = { "titl", "san"};
|
||||
int startOffsets[] = { 0, 6 };
|
||||
int endOffsets[] = { 5, 9 };
|
||||
int posIncr[] = { 1, 2 };
|
||||
assertAnalyzesTo(ca, sentence, result, startOffsets, endOffsets, posIncr);
|
||||
}
|
||||
|
||||
public void testChineseAnalyzer() throws Exception {
|
||||
Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true);
|
||||
String sentence = "我购买了道具和服装。";
|
||||
|
|
Loading…
Reference in New Issue