TokenStreamToAutomaton failed to handle certain holes correctly

This commit is contained in:
Mike McCandless 2017-01-08 06:26:08 -05:00
parent 1aa9c42512
commit e64111c654
2 changed files with 22 additions and 1 deletions

View File

@ -113,6 +113,7 @@ public class TokenStreamToAutomaton {
final RollingBuffer<Position> positions = new Positions();
int pos = -1;
int freedPos = 0;
Position posData = null;
int maxOffset = 0;
while (in.incrementToken()) {
@ -150,7 +151,15 @@ public class TokenStreamToAutomaton {
addHoles(builder, positions, pos);
}
}
positions.freeBefore(pos);
while (freedPos <= pos) {
Position freePosData = positions.get(freedPos);
// don't free this position yet if we may still need to fill holes over it:
if (freePosData.arriving == -1 || freePosData.leaving == -1) {
break;
}
positions.freeBefore(freedPos);
freedPos++;
}
}
final int endPos = pos + posLengthAtt.getPositionLength();

View File

@ -585,4 +585,16 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
Operations.determinize(Operations.removeDeadStates(expected), DEFAULT_MAX_DETERMINIZED_STATES),
Operations.determinize(Operations.removeDeadStates(actual), DEFAULT_MAX_DETERMINIZED_STATES)));
}
public void testTokenStreamGraphWithHoles() throws Exception {
final TokenStream ts = new CannedTokenStream(
new Token[] {
token("abc", 1, 1),
token("xyz", 1, 8),
token("def", 1, 1),
token("ghi", 1, 1),
});
assertSameLanguage(Operations.union(join(s2a("abc"), SEP_A, s2a("xyz")),
join(s2a("abc"), SEP_A, HOLE_A, SEP_A, s2a("def"), SEP_A, s2a("ghi"))), ts);
}
}