Added UNICODE_CHARACTER_CLASS support to Regex flags. This flag is only supported in Java7 and is ignored if set on a java 6 JVM

Closes #2895
This commit is contained in:
Simon Willnauer 2013-04-16 10:06:53 +02:00
parent eb21526552
commit 30f9f278c3
2 changed files with 80 additions and 8 deletions

View File

@ -22,6 +22,7 @@ package org.elasticsearch.common.regex;
import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
import java.util.Locale;
import java.util.regex.Pattern; import java.util.regex.Pattern;
/** /**
@ -29,6 +30,13 @@ import java.util.regex.Pattern;
*/ */
public class Regex { public class Regex {
/**
* This Regex / {@link Pattern} flag is supported from Java 7 on.
* If set on a Java6 JVM the flag will be ignored.
*
*/
public static final int UNICODE_CHARACTER_CLASS = 0x100; // supported in JAVA7
/** /**
* Is the str a simple match pattern. * Is the str a simple match pattern.
*/ */
@ -107,22 +115,25 @@ public class Regex {
if (s.isEmpty()) { if (s.isEmpty()) {
continue; continue;
} }
if ("CASE_INSENSITIVE".equalsIgnoreCase(s)) { s = s.toUpperCase(Locale.ROOT);
if ("CASE_INSENSITIVE".equals(s)) {
pFlags |= Pattern.CASE_INSENSITIVE; pFlags |= Pattern.CASE_INSENSITIVE;
} else if ("MULTILINE".equalsIgnoreCase(s)) { } else if ("MULTILINE".equals(s)) {
pFlags |= Pattern.MULTILINE; pFlags |= Pattern.MULTILINE;
} else if ("DOTALL".equalsIgnoreCase(s)) { } else if ("DOTALL".equals(s)) {
pFlags |= Pattern.DOTALL; pFlags |= Pattern.DOTALL;
} else if ("UNICODE_CASE".equalsIgnoreCase(s)) { } else if ("UNICODE_CASE".equals(s)) {
pFlags |= Pattern.UNICODE_CASE; pFlags |= Pattern.UNICODE_CASE;
} else if ("CANON_EQ".equalsIgnoreCase(s)) { } else if ("CANON_EQ".equals(s)) {
pFlags |= Pattern.CANON_EQ; pFlags |= Pattern.CANON_EQ;
} else if ("UNIX_LINES".equalsIgnoreCase(s)) { } else if ("UNIX_LINES".equals(s)) {
pFlags |= Pattern.UNIX_LINES; pFlags |= Pattern.UNIX_LINES;
} else if ("LITERAL".equalsIgnoreCase(s)) { } else if ("LITERAL".equals(s)) {
pFlags |= Pattern.LITERAL; pFlags |= Pattern.LITERAL;
} else if ("COMMENTS".equalsIgnoreCase(s)) { } else if ("COMMENTS".equals(s)) {
pFlags |= Pattern.COMMENTS; pFlags |= Pattern.COMMENTS;
} else if ("UNICODE_CHAR_CLASS".equals(s)) {
pFlags |= UNICODE_CHARACTER_CLASS;
} else { } else {
throw new ElasticSearchIllegalArgumentException("Unknown regex flag [" + s + "]"); throw new ElasticSearchIllegalArgumentException("Unknown regex flag [" + s + "]");
} }
@ -156,6 +167,9 @@ public class Regex {
if ((flags & Pattern.COMMENTS) != 0) { if ((flags & Pattern.COMMENTS) != 0) {
sb.append("COMMENTS|"); sb.append("COMMENTS|");
} }
if ((flags & UNICODE_CHARACTER_CLASS) != 0) {
sb.append("UNICODE_CHAR_CLASS|");
}
return sb.toString(); return sb.toString();
} }
} }

View File

@ -0,0 +1,58 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.unit.common.regex;
import java.util.Random;
import java.util.regex.Pattern;
import org.elasticsearch.common.regex.Regex;
import org.testng.annotations.Test;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
public class RegexTests {
@Test
public void testFlags() {
String[] supportedFlags = new String[] { "CASE_INSENSITIVE", "MULTILINE", "DOTALL", "UNICODE_CASE", "CANON_EQ", "UNIX_LINES",
"LITERAL", "COMMENTS", "UNICODE_CHAR_CLASS" };
int[] flags = new int[] { Pattern.CASE_INSENSITIVE, Pattern.MULTILINE, Pattern.DOTALL, Pattern.UNICODE_CASE, Pattern.CANON_EQ,
Pattern.UNIX_LINES, Pattern.LITERAL, Pattern.COMMENTS, Regex.UNICODE_CHARACTER_CLASS };
long seed = System.currentTimeMillis();
Random random = new Random(seed);
int num = 10 + random.nextInt(100);
for (int i = 0; i < num; i++) {
int numFlags = random.nextInt(flags.length+1);
int current = 0;
StringBuilder builder = new StringBuilder();
for (int j = 0; j < numFlags; j++) {
int index = random.nextInt(flags.length);
current |= flags[index];
builder.append(supportedFlags[index]);
if (j < numFlags-1) {
builder.append("|");
}
}
String flagsToString = Regex.flagsToString(current);
assertThat(Regex.flagsFromString(builder.toString()), equalTo(current));
assertThat(Regex.flagsFromString(builder.toString()), equalTo(Regex.flagsFromString(flagsToString)));
Pattern.compile("\\w\\d{1,2}", current); // accepts the flags?
}
}
}