From 4456273922144d9b856cf885ff7fc2b797d37f02 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 9 Apr 2012 16:47:56 +0000 Subject: [PATCH] LUCENE-3969: fix PatternTokenizer to not consume chars from the input Reader if it throws IAE git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3969@1311318 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/analysis/pattern/PatternTokenizer.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java index 6aca0c5edd8..bc80391c95e 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java @@ -69,12 +69,17 @@ public final class PatternTokenizer extends Tokenizer { super(input); this.pattern = pattern; this.group = group; - fillBuffer(str, input); - matcher = pattern.matcher(str); + + // Use "" instead of str so don't consume chars + // (fillBuffer) from the input on throwing IAE below: + matcher = pattern.matcher(""); + // confusingly group count depends ENTIRELY on the pattern but is only accessible via matcher if (group >= 0 && group > matcher.groupCount()) { throw new IllegalArgumentException("invalid group specified: pattern only has: " + matcher.groupCount() + " capturing groups"); } + fillBuffer(str, input); + matcher.reset(str); index = 0; }