ignore silly bugs in sun's regex impl

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1371725 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-08-10 14:46:30 +00:00
parent e2935c224e
commit 813f5344dd
1 changed files with 10 additions and 1 deletions

View File

@ -956,9 +956,18 @@ public class _TestUtil {
while (true) {
try {
Pattern p = Pattern.compile(_TestUtil.randomRegexpishString(random));
String replacement = null;
// ignore bugs in Sun's regex impl
try {
replacement = p.matcher(nonBmpString).replaceAll("_");
} catch (StringIndexOutOfBoundsException jdkBug) {
System.out.println("WARNING: your jdk is buggy!");
System.out.println("Pattern.compile(\"" + p.pattern() +
"\").matcher(\"AB\\uD840\\uDC00C\").replaceAll(\"_\"); should not throw IndexOutOfBounds!");
}
// Make sure the result of applying the pattern to a string with extended
// unicode characters is a valid utf16 string. See LUCENE-4078 for discussion.
if (UnicodeUtil.validUTF16String(p.matcher(nonBmpString).replaceAll("_"))) {
if (replacement != null && UnicodeUtil.validUTF16String(replacement)) {
return p;
}
} catch (PatternSyntaxException ignored) {