mirror of https://github.com/apache/lucene.git
LUCENE-3026: SmartChineseAnalyzer's WordTokenFilter threw NullPointerException on sentences longer than 32,767 characters
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1092328 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c28df3e572
commit
ecd795c585
|
@ -45,7 +45,10 @@ API Changes
|
||||||
|
|
||||||
======================= Lucene 3.x (not yet released) =======================
|
======================= Lucene 3.x (not yet released) =======================
|
||||||
|
|
||||||
(No changes)
|
Bug fixes
|
||||||
|
|
||||||
|
* LUCENE-3026: SmartChineseAnalyzer's WordTokenFilter threw NullPointerException
|
||||||
|
on sentences longer than 32,767 characters. (wangzhenghang via Robert Muir)
|
||||||
|
|
||||||
======================= Lucene 3.1.0 =======================
|
======================= Lucene 3.1.0 =======================
|
||||||
|
|
||||||
|
|
|
@ -75,7 +75,7 @@ class SegGraph {
|
||||||
List<SegToken> result = new ArrayList<SegToken>();
|
List<SegToken> result = new ArrayList<SegToken>();
|
||||||
int s = -1, count = 0, size = tokenListTable.size();
|
int s = -1, count = 0, size = tokenListTable.size();
|
||||||
List<SegToken> tokenList;
|
List<SegToken> tokenList;
|
||||||
short index = 0;
|
int index = 0;
|
||||||
while (count < size) {
|
while (count < size) {
|
||||||
if (isStartExist(s)) {
|
if (isStartExist(s)) {
|
||||||
tokenList = tokenListTable.get(s);
|
tokenList = tokenListTable.get(s);
|
||||||
|
|
|
@ -17,8 +17,11 @@
|
||||||
|
|
||||||
package org.apache.lucene.analysis.cn.smart;
|
package org.apache.lucene.analysis.cn.smart;
|
||||||
|
|
||||||
|
import java.io.StringReader;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
||||||
|
@ -166,4 +169,30 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
||||||
new int[] { 0, 1, 3, 4, 6, 7 },
|
new int[] { 0, 1, 3, 4, 6, 7 },
|
||||||
new int[] { 1, 3, 4, 6, 7, 9 });
|
new int[] { 1, 3, 4, 6, 7, 9 });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE-3026
|
||||||
|
public void testLargeDocument() throws Exception {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (int i = 0; i < 5000; i++) {
|
||||||
|
sb.append("我购买了道具和服装。");
|
||||||
|
}
|
||||||
|
Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT);
|
||||||
|
TokenStream stream = analyzer.reusableTokenStream("", new StringReader(sb.toString()));
|
||||||
|
stream.reset();
|
||||||
|
while (stream.incrementToken()) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LUCENE-3026
|
||||||
|
public void testLargeSentence() throws Exception {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (int i = 0; i < 5000; i++) {
|
||||||
|
sb.append("我购买了道具和服装");
|
||||||
|
}
|
||||||
|
Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT);
|
||||||
|
TokenStream stream = analyzer.reusableTokenStream("", new StringReader(sb.toString()));
|
||||||
|
stream.reset();
|
||||||
|
while (stream.incrementToken()) {
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue