mirror of https://github.com/apache/lucene.git
Applied patched for LUCENE-324, correcting token offsets returned by ChineseTokenizer
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@353930 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ebe44ace90
commit
7a3103fac0
|
@ -306,7 +306,10 @@ Bug fixes
|
||||||
(Yonik Seeley, LUCENE-462)
|
(Yonik Seeley, LUCENE-462)
|
||||||
|
|
||||||
18. Fixed inefficient memory usage when loading an index into RAMDirectory.
|
18. Fixed inefficient memory usage when loading an index into RAMDirectory.
|
||||||
(Volodymyr Bychkoviak via Bernhard, LUCENE-475)
|
(Volodymyr Bychkoviak via Bernhard, LUCENE-475)
|
||||||
|
|
||||||
|
19. Corrected term offsets returned by ChineseTokenizer.
|
||||||
|
(Ray Tsang via Erik Hatcher, LUCENE-324)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
|
|
|
@ -117,6 +117,7 @@ public final class ChineseTokenizer extends Tokenizer {
|
||||||
case Character.OTHER_LETTER:
|
case Character.OTHER_LETTER:
|
||||||
if (length>0) {
|
if (length>0) {
|
||||||
bufferIndex--;
|
bufferIndex--;
|
||||||
|
offset--;
|
||||||
return flush();
|
return flush();
|
||||||
}
|
}
|
||||||
push(c);
|
push(c);
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
package org.apache.lucene.analysis.cn;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author rayt
|
||||||
|
*/
|
||||||
|
public class TestChineseTokenizer extends TestCase
|
||||||
|
{
|
||||||
|
public void testOtherLetterOffset() throws IOException
|
||||||
|
{
|
||||||
|
String s = "a天b";
|
||||||
|
ChineseTokenizer tokenizer = new ChineseTokenizer(new StringReader(s));
|
||||||
|
Token token;
|
||||||
|
|
||||||
|
int correctStartOffset = 0;
|
||||||
|
int correctEndOffset = 1;
|
||||||
|
while ((token = tokenizer.next()) != null)
|
||||||
|
{
|
||||||
|
assertEquals(correctStartOffset, token.startOffset());
|
||||||
|
assertEquals(correctEndOffset, token.endOffset());
|
||||||
|
correctStartOffset++;
|
||||||
|
correctEndOffset++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue