LUCENE-7279: don't throw AIOOBE on some valid inputs

This commit is contained in:
Mike McCandless 2016-05-12 05:54:22 -04:00
parent 69cb606d78
commit 5947264ff1
2 changed files with 14 additions and 6 deletions

View File

@ -163,9 +163,6 @@ public final class JapaneseTokenizer extends Tokenizer {
// Allowable cost difference for N-best output: // Allowable cost difference for N-best output:
private int nBestCost = 0; private int nBestCost = 0;
// Index of the last character of unknown word:
private int unknownWordEndIndex = -1;
// True once we've hit the EOF from the input reader: // True once we've hit the EOF from the input reader:
private boolean end; private boolean end;
@ -279,7 +276,6 @@ public final class JapaneseTokenizer extends Tokenizer {
private void resetState() { private void resetState() {
positions.reset(); positions.reset();
unknownWordEndIndex = -1;
pos = 0; pos = 0;
end = false; end = false;
lastBackTracePos = 0; lastBackTracePos = 0;
@ -432,7 +428,7 @@ public final class JapaneseTokenizer extends Tokenizer {
// end of loop), plus bigram cost: // end of loop), plus bigram cost:
final int cost = fromPosData.costs[idx] + costs.get(fromPosData.lastRightID[idx], leftID); final int cost = fromPosData.costs[idx] + costs.get(fromPosData.lastRightID[idx], leftID);
if (VERBOSE) { if (VERBOSE) {
System.out.println(" fromIDX=" + idx + ": cost=" + cost + " (prevCost=" + fromPosData.costs[idx] + " wordCost=" + wordCost + " bgCost=" + costs.get(fromPosData.lastRightID[idx], leftID) + " leftID=" + leftID); System.out.println(" fromIDX=" + idx + ": cost=" + cost + " (prevCost=" + fromPosData.costs[idx] + " wordCost=" + wordCost + " bgCost=" + costs.get(fromPosData.lastRightID[idx], leftID) + " leftID=" + leftID + ")");
} }
if (cost < leastCost) { if (cost < leastCost) {
leastCost = cost; leastCost = cost;
@ -629,6 +625,9 @@ public final class JapaneseTokenizer extends Tokenizer {
System.out.println("\nPARSE"); System.out.println("\nPARSE");
} }
// Index of the last character of unknown word:
int unknownWordEndIndex = -1;
// Advances over each position (character): // Advances over each position (character):
while (true) { while (true) {
@ -752,7 +751,7 @@ public final class JapaneseTokenizer extends Tokenizer {
} }
if (VERBOSE) { if (VERBOSE) {
System.out.println("\n extend @ pos=" + pos + " char=" + (char) buffer.get(pos)); System.out.println("\n extend @ pos=" + pos + " char=" + (char) buffer.get(pos) + " hex=" + Integer.toHexString(buffer.get(pos)));
} }
if (VERBOSE) { if (VERBOSE) {