LUCENE-3969: fix PatternTokenizer to not consume chars from the input Reader if it throws IAE

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3969@1311318 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-04-09 16:47:56 +00:00
parent d76a03214c
commit 4456273922
1 changed files with 7 additions and 2 deletions

View File

@ -69,12 +69,17 @@ public final class PatternTokenizer extends Tokenizer {
super(input);
this.pattern = pattern;
this.group = group;
fillBuffer(str, input);
matcher = pattern.matcher(str);
// Use "" instead of str so don't consume chars
// (fillBuffer) from the input on throwing IAE below:
matcher = pattern.matcher("");
// confusingly group count depends ENTIRELY on the pattern but is only accessible via matcher
if (group >= 0 && group > matcher.groupCount()) {
throw new IllegalArgumentException("invalid group specified: pattern only has: " + matcher.groupCount() + " capturing groups");
}
fillBuffer(str, input);
matcher.reset(str);
index = 0;
}