From 813f5344ddc33ef307e9263edab97b0c04299ee9 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 10 Aug 2012 14:46:30 +0000 Subject: [PATCH] ignore silly bugs in sun's regex impl git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1371725 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/java/org/apache/lucene/util/_TestUtil.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java index 77915668e87..0a0fd27f3f6 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java @@ -956,9 +956,18 @@ public class _TestUtil { while (true) { try { Pattern p = Pattern.compile(_TestUtil.randomRegexpishString(random)); + String replacement = null; + // ignore bugs in Sun's regex impl + try { + replacement = p.matcher(nonBmpString).replaceAll("_"); + } catch (StringIndexOutOfBoundsException jdkBug) { + System.out.println("WARNING: your jdk is buggy!"); + System.out.println("Pattern.compile(\"" + p.pattern() + + "\").matcher(\"AB\\uD840\\uDC00C\").replaceAll(\"_\"); should not throw IndexOutOfBounds!"); + } // Make sure the result of applying the pattern to a string with extended // unicode characters is a valid utf16 string. See LUCENE-4078 for discussion. - if (UnicodeUtil.validUTF16String(p.matcher(nonBmpString).replaceAll("_"))) { + if (replacement != null && UnicodeUtil.validUTF16String(replacement)) { return p; } } catch (PatternSyntaxException ignored) {