mirror of https://github.com/apache/lucene.git
TokenStreamToAutomaton failed to handle certain holes correctly
This commit is contained in:
parent
1aa9c42512
commit
e64111c654
|
@ -113,6 +113,7 @@ public class TokenStreamToAutomaton {
|
|||
final RollingBuffer<Position> positions = new Positions();
|
||||
|
||||
int pos = -1;
|
||||
int freedPos = 0;
|
||||
Position posData = null;
|
||||
int maxOffset = 0;
|
||||
while (in.incrementToken()) {
|
||||
|
@ -150,7 +151,15 @@ public class TokenStreamToAutomaton {
|
|||
addHoles(builder, positions, pos);
|
||||
}
|
||||
}
|
||||
positions.freeBefore(pos);
|
||||
while (freedPos <= pos) {
|
||||
Position freePosData = positions.get(freedPos);
|
||||
// don't free this position yet if we may still need to fill holes over it:
|
||||
if (freePosData.arriving == -1 || freePosData.leaving == -1) {
|
||||
break;
|
||||
}
|
||||
positions.freeBefore(freedPos);
|
||||
freedPos++;
|
||||
}
|
||||
}
|
||||
|
||||
final int endPos = pos + posLengthAtt.getPositionLength();
|
||||
|
|
|
@ -585,4 +585,16 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
|
|||
Operations.determinize(Operations.removeDeadStates(expected), DEFAULT_MAX_DETERMINIZED_STATES),
|
||||
Operations.determinize(Operations.removeDeadStates(actual), DEFAULT_MAX_DETERMINIZED_STATES)));
|
||||
}
|
||||
|
||||
public void testTokenStreamGraphWithHoles() throws Exception {
|
||||
final TokenStream ts = new CannedTokenStream(
|
||||
new Token[] {
|
||||
token("abc", 1, 1),
|
||||
token("xyz", 1, 8),
|
||||
token("def", 1, 1),
|
||||
token("ghi", 1, 1),
|
||||
});
|
||||
assertSameLanguage(Operations.union(join(s2a("abc"), SEP_A, s2a("xyz")),
|
||||
join(s2a("abc"), SEP_A, HOLE_A, SEP_A, s2a("def"), SEP_A, s2a("ghi"))), ts);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue