mirror of https://github.com/apache/lucene.git
LUCENE-8676: The Korean tokenizer does not update the last position if the backtrace is caused by a big buffer (1024 chars).
This commit is contained in:
parent
fdb6353539
commit
e9c02a6f71
|
@ -308,6 +308,9 @@ Bug fixes:
|
||||||
was not propagating final position increments from its child streams correctly.
|
was not propagating final position increments from its child streams correctly.
|
||||||
(Dan Meehl, Alan Woodward)
|
(Dan Meehl, Alan Woodward)
|
||||||
|
|
||||||
|
* LUCENE-8676: The Korean tokenizer does not update the last position if the backtrace is caused
|
||||||
|
by a big buffer (1024 chars). (Jim Ferenczi)
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
|
|
||||||
* LUCENE-8026: ExitableDirectoryReader may now time out queries that run on
|
* LUCENE-8026: ExitableDirectoryReader may now time out queries that run on
|
||||||
|
|
|
@ -535,7 +535,6 @@ public final class KoreanTokenizer extends Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pos > lastBackTracePos && posData.count == 1 && isFrontier) {
|
if (pos > lastBackTracePos && posData.count == 1 && isFrontier) {
|
||||||
// if (pos > lastBackTracePos && posData.count == 1 && isFrontier) {
|
|
||||||
// We are at a "frontier", and only one node is
|
// We are at a "frontier", and only one node is
|
||||||
// alive, so whatever the eventual best path is must
|
// alive, so whatever the eventual best path is must
|
||||||
// come through this node. So we can safely commit
|
// come through this node. So we can safely commit
|
||||||
|
@ -618,6 +617,7 @@ public final class KoreanTokenizer extends Tokenizer {
|
||||||
} else {
|
} else {
|
||||||
// This means the backtrace only produced
|
// This means the backtrace only produced
|
||||||
// punctuation tokens, so we must keep parsing.
|
// punctuation tokens, so we must keep parsing.
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue