LUCENE-3897: if best scoring path is ahead of current pos, move forward

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1305149 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-03-25 21:37:55 +00:00
parent c876a1b850
commit cb1a9a0cdf
3 changed files with 32 additions and 11 deletions

View File

@ -92,7 +92,7 @@ public final class RollingCharBuffer {
assert pos < nextPos;
// Cannot read from already freed past:
assert nextPos - pos <= count;
assert nextPos - pos <= count: "nextPos=" + nextPos + " pos=" + pos + " count=" + count;
final int index = getIndex(pos);
return buffer[index];

View File

@ -683,20 +683,20 @@ public final class KuromojiTokenizer extends Tokenizer {
// Re-base cost so we don't risk int overflow:
Arrays.fill(leastPosData.costs, 0, leastPosData.count, 0);
if (pos != leastPosData.pos) {
// We jumped into a future position:
assert pos < leastPosData.pos;
pos = leastPosData.pos;
}
if (pending.size() != 0) {
return;
} else {
// This means the backtrace only produced
// punctuation tokens, so we must keep parsing.
if (pos != leastPosData.pos) {
// We jumped into a future position; continue to
// the top of the loop to skip until we get
// there:
assert pos < leastPosData.pos;
continue;
}
}
}
if (VERBOSE) {
System.out.println("\n extend @ pos=" + pos + " char=" + (char) buffer.get(pos));
@ -955,11 +955,12 @@ public final class KuromojiTokenizer extends Tokenizer {
// the pending list. The pending list is then in-reverse
// (last token should be returned first).
private void backtrace(final Position endPosData, final int fromIDX) throws IOException {
if (VERBOSE) {
System.out.println("\n backtrace: pos=" + pos + "; " + (pos - lastBackTracePos) + " characters; last=" + lastBackTracePos + " cost=" + endPosData.costs[fromIDX]);
}
final int endPos = endPosData.pos;
if (VERBOSE) {
System.out.println("\n backtrace: endPos=" + endPos + " pos=" + pos + "; " + (pos - lastBackTracePos) + " characters; last=" + lastBackTracePos + " cost=" + endPosData.costs[fromIDX]);
}
final char[] fragment = buffer.get(lastBackTracePos, endPos-lastBackTracePos);
if (dotOut != null) {