mirror of https://github.com/apache/lucene.git
LUCENE-7279: don't throw AIOOBE on some valid inputs
This commit is contained in:
parent
69cb606d78
commit
5947264ff1
|
@ -163,9 +163,6 @@ public final class JapaneseTokenizer extends Tokenizer {
|
||||||
// Allowable cost difference for N-best output:
|
// Allowable cost difference for N-best output:
|
||||||
private int nBestCost = 0;
|
private int nBestCost = 0;
|
||||||
|
|
||||||
// Index of the last character of unknown word:
|
|
||||||
private int unknownWordEndIndex = -1;
|
|
||||||
|
|
||||||
// True once we've hit the EOF from the input reader:
|
// True once we've hit the EOF from the input reader:
|
||||||
private boolean end;
|
private boolean end;
|
||||||
|
|
||||||
|
@ -279,7 +276,6 @@ public final class JapaneseTokenizer extends Tokenizer {
|
||||||
|
|
||||||
private void resetState() {
|
private void resetState() {
|
||||||
positions.reset();
|
positions.reset();
|
||||||
unknownWordEndIndex = -1;
|
|
||||||
pos = 0;
|
pos = 0;
|
||||||
end = false;
|
end = false;
|
||||||
lastBackTracePos = 0;
|
lastBackTracePos = 0;
|
||||||
|
@ -432,7 +428,7 @@ public final class JapaneseTokenizer extends Tokenizer {
|
||||||
// end of loop), plus bigram cost:
|
// end of loop), plus bigram cost:
|
||||||
final int cost = fromPosData.costs[idx] + costs.get(fromPosData.lastRightID[idx], leftID);
|
final int cost = fromPosData.costs[idx] + costs.get(fromPosData.lastRightID[idx], leftID);
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println(" fromIDX=" + idx + ": cost=" + cost + " (prevCost=" + fromPosData.costs[idx] + " wordCost=" + wordCost + " bgCost=" + costs.get(fromPosData.lastRightID[idx], leftID) + " leftID=" + leftID);
|
System.out.println(" fromIDX=" + idx + ": cost=" + cost + " (prevCost=" + fromPosData.costs[idx] + " wordCost=" + wordCost + " bgCost=" + costs.get(fromPosData.lastRightID[idx], leftID) + " leftID=" + leftID + ")");
|
||||||
}
|
}
|
||||||
if (cost < leastCost) {
|
if (cost < leastCost) {
|
||||||
leastCost = cost;
|
leastCost = cost;
|
||||||
|
@ -629,6 +625,9 @@ public final class JapaneseTokenizer extends Tokenizer {
|
||||||
System.out.println("\nPARSE");
|
System.out.println("\nPARSE");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Index of the last character of unknown word:
|
||||||
|
int unknownWordEndIndex = -1;
|
||||||
|
|
||||||
// Advances over each position (character):
|
// Advances over each position (character):
|
||||||
while (true) {
|
while (true) {
|
||||||
|
|
||||||
|
@ -752,7 +751,7 @@ public final class JapaneseTokenizer extends Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("\n extend @ pos=" + pos + " char=" + (char) buffer.get(pos));
|
System.out.println("\n extend @ pos=" + pos + " char=" + (char) buffer.get(pos) + " hex=" + Integer.toHexString(buffer.get(pos)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue