mirror of https://github.com/apache/lucene.git
LUCENE-8676: The Korean tokenizer does not update the last position if the backtrace is caused by a big buffer (1024 chars).
This commit is contained in:
parent
fdb6353539
commit
e9c02a6f71
|
@ -308,6 +308,9 @@ Bug fixes:
|
|||
was not propagating final position increments from its child streams correctly.
|
||||
(Dan Meehl, Alan Woodward)
|
||||
|
||||
* LUCENE-8676: The Korean tokenizer does not update the last position if the backtrace is caused
|
||||
by a big buffer (1024 chars). (Jim Ferenczi)
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-8026: ExitableDirectoryReader may now time out queries that run on
|
||||
|
|
|
@ -535,7 +535,6 @@ public final class KoreanTokenizer extends Tokenizer {
|
|||
}
|
||||
|
||||
if (pos > lastBackTracePos && posData.count == 1 && isFrontier) {
|
||||
// if (pos > lastBackTracePos && posData.count == 1 && isFrontier) {
|
||||
// We are at a "frontier", and only one node is
|
||||
// alive, so whatever the eventual best path is must
|
||||
// come through this node. So we can safely commit
|
||||
|
@ -618,6 +617,7 @@ public final class KoreanTokenizer extends Tokenizer {
|
|||
} else {
|
||||
// This means the backtrace only produced
|
||||
// punctuation tokens, so we must keep parsing.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue