mirror of https://github.com/apache/lucene.git
LUCENE-3897: if best scoring path is ahead of current pos, move forward
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1305149 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c876a1b850
commit
cb1a9a0cdf
|
@ -92,7 +92,7 @@ public final class RollingCharBuffer {
|
||||||
assert pos < nextPos;
|
assert pos < nextPos;
|
||||||
|
|
||||||
// Cannot read from already freed past:
|
// Cannot read from already freed past:
|
||||||
assert nextPos - pos <= count;
|
assert nextPos - pos <= count: "nextPos=" + nextPos + " pos=" + pos + " count=" + count;
|
||||||
|
|
||||||
final int index = getIndex(pos);
|
final int index = getIndex(pos);
|
||||||
return buffer[index];
|
return buffer[index];
|
||||||
|
|
|
@ -683,20 +683,20 @@ public final class KuromojiTokenizer extends Tokenizer {
|
||||||
// Re-base cost so we don't risk int overflow:
|
// Re-base cost so we don't risk int overflow:
|
||||||
Arrays.fill(leastPosData.costs, 0, leastPosData.count, 0);
|
Arrays.fill(leastPosData.costs, 0, leastPosData.count, 0);
|
||||||
|
|
||||||
|
if (pos != leastPosData.pos) {
|
||||||
|
// We jumped into a future position:
|
||||||
|
assert pos < leastPosData.pos;
|
||||||
|
pos = leastPosData.pos;
|
||||||
|
}
|
||||||
|
|
||||||
if (pending.size() != 0) {
|
if (pending.size() != 0) {
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
// This means the backtrace only produced
|
// This means the backtrace only produced
|
||||||
// punctuation tokens, so we must keep parsing.
|
// punctuation tokens, so we must keep parsing.
|
||||||
if (pos != leastPosData.pos) {
|
|
||||||
// We jumped into a future position; continue to
|
|
||||||
// the top of the loop to skip until we get
|
|
||||||
// there:
|
|
||||||
assert pos < leastPosData.pos;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("\n extend @ pos=" + pos + " char=" + (char) buffer.get(pos));
|
System.out.println("\n extend @ pos=" + pos + " char=" + (char) buffer.get(pos));
|
||||||
|
@ -955,11 +955,12 @@ public final class KuromojiTokenizer extends Tokenizer {
|
||||||
// the pending list. The pending list is then in-reverse
|
// the pending list. The pending list is then in-reverse
|
||||||
// (last token should be returned first).
|
// (last token should be returned first).
|
||||||
private void backtrace(final Position endPosData, final int fromIDX) throws IOException {
|
private void backtrace(final Position endPosData, final int fromIDX) throws IOException {
|
||||||
if (VERBOSE) {
|
|
||||||
System.out.println("\n backtrace: pos=" + pos + "; " + (pos - lastBackTracePos) + " characters; last=" + lastBackTracePos + " cost=" + endPosData.costs[fromIDX]);
|
|
||||||
}
|
|
||||||
final int endPos = endPosData.pos;
|
final int endPos = endPosData.pos;
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("\n backtrace: endPos=" + endPos + " pos=" + pos + "; " + (pos - lastBackTracePos) + " characters; last=" + lastBackTracePos + " cost=" + endPosData.costs[fromIDX]);
|
||||||
|
}
|
||||||
|
|
||||||
final char[] fragment = buffer.get(lastBackTracePos, endPos-lastBackTracePos);
|
final char[] fragment = buffer.get(lastBackTracePos, endPos-lastBackTracePos);
|
||||||
|
|
||||||
if (dotOut != null) {
|
if (dotOut != null) {
|
||||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue