LUCENE-3026: SmartChineseAnalyzer's WordTokenFilter threw NullPointerException on sentences longer than 32,767 characters

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1092328 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-04-14 15:15:31 +00:00
parent c28df3e572
commit ecd795c585
3 changed files with 34 additions and 2 deletions

View File

@ -45,7 +45,10 @@ API Changes
======================= Lucene 3.x (not yet released) ======================= ======================= Lucene 3.x (not yet released) =======================
(No changes) Bug fixes
* LUCENE-3026: SmartChineseAnalyzer's WordTokenFilter threw NullPointerException
on sentences longer than 32,767 characters. (wangzhenghang via Robert Muir)
======================= Lucene 3.1.0 ======================= ======================= Lucene 3.1.0 =======================

View File

@ -75,7 +75,7 @@ class SegGraph {
List<SegToken> result = new ArrayList<SegToken>(); List<SegToken> result = new ArrayList<SegToken>();
int s = -1, count = 0, size = tokenListTable.size(); int s = -1, count = 0, size = tokenListTable.size();
List<SegToken> tokenList; List<SegToken> tokenList;
short index = 0; int index = 0;
while (count < size) { while (count < size) {
if (isStartExist(s)) { if (isStartExist(s)) {
tokenList = tokenListTable.get(s); tokenList = tokenListTable.get(s);

View File

@ -17,8 +17,11 @@
package org.apache.lucene.analysis.cn.smart; package org.apache.lucene.analysis.cn.smart;
import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
@ -166,4 +169,30 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
new int[] { 0, 1, 3, 4, 6, 7 }, new int[] { 0, 1, 3, 4, 6, 7 },
new int[] { 1, 3, 4, 6, 7, 9 }); new int[] { 1, 3, 4, 6, 7, 9 });
} }
// LUCENE-3026
public void testLargeDocument() throws Exception {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < 5000; i++) {
sb.append("我购买了道具和服装。");
}
Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT);
TokenStream stream = analyzer.reusableTokenStream("", new StringReader(sb.toString()));
stream.reset();
while (stream.incrementToken()) {
}
}
// LUCENE-3026
public void testLargeSentence() throws Exception {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < 5000; i++) {
sb.append("我购买了道具和服装");
}
Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT);
TokenStream stream = analyzer.reusableTokenStream("", new StringReader(sb.toString()));
stream.reset();
while (stream.incrementToken()) {
}
}
} }