diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java index 6aca0c5edd8..bc80391c95e 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java @@ -69,12 +69,17 @@ public final class PatternTokenizer extends Tokenizer { super(input); this.pattern = pattern; this.group = group; - fillBuffer(str, input); - matcher = pattern.matcher(str); + + // Use "" instead of str so don't consume chars + // (fillBuffer) from the input on throwing IAE below: + matcher = pattern.matcher(""); + // confusingly group count depends ENTIRELY on the pattern but is only accessible via matcher if (group >= 0 && group > matcher.groupCount()) { throw new IllegalArgumentException("invalid group specified: pattern only has: " + matcher.groupCount() + " capturing groups"); } + fillBuffer(str, input); + matcher.reset(str); index = 0; }