Added UNICODE_CHARACTER_CLASS support to Regex flags. This flag is only supported in Java7 and is ignored if set on a java 6 JVM
Closes #2895
This commit is contained in:
parent
eb21526552
commit
30f9f278c3
|
@ -22,12 +22,20 @@ package org.elasticsearch.common.regex;
|
|||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.Strings;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class Regex {
|
||||
|
||||
/**
|
||||
* This Regex / {@link Pattern} flag is supported from Java 7 on.
|
||||
* If set on a Java6 JVM the flag will be ignored.
|
||||
*
|
||||
*/
|
||||
public static final int UNICODE_CHARACTER_CLASS = 0x100; // supported in JAVA7
|
||||
|
||||
/**
|
||||
* Is the str a simple match pattern.
|
||||
|
@ -107,22 +115,25 @@ public class Regex {
|
|||
if (s.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
if ("CASE_INSENSITIVE".equalsIgnoreCase(s)) {
|
||||
s = s.toUpperCase(Locale.ROOT);
|
||||
if ("CASE_INSENSITIVE".equals(s)) {
|
||||
pFlags |= Pattern.CASE_INSENSITIVE;
|
||||
} else if ("MULTILINE".equalsIgnoreCase(s)) {
|
||||
} else if ("MULTILINE".equals(s)) {
|
||||
pFlags |= Pattern.MULTILINE;
|
||||
} else if ("DOTALL".equalsIgnoreCase(s)) {
|
||||
} else if ("DOTALL".equals(s)) {
|
||||
pFlags |= Pattern.DOTALL;
|
||||
} else if ("UNICODE_CASE".equalsIgnoreCase(s)) {
|
||||
} else if ("UNICODE_CASE".equals(s)) {
|
||||
pFlags |= Pattern.UNICODE_CASE;
|
||||
} else if ("CANON_EQ".equalsIgnoreCase(s)) {
|
||||
} else if ("CANON_EQ".equals(s)) {
|
||||
pFlags |= Pattern.CANON_EQ;
|
||||
} else if ("UNIX_LINES".equalsIgnoreCase(s)) {
|
||||
} else if ("UNIX_LINES".equals(s)) {
|
||||
pFlags |= Pattern.UNIX_LINES;
|
||||
} else if ("LITERAL".equalsIgnoreCase(s)) {
|
||||
} else if ("LITERAL".equals(s)) {
|
||||
pFlags |= Pattern.LITERAL;
|
||||
} else if ("COMMENTS".equalsIgnoreCase(s)) {
|
||||
} else if ("COMMENTS".equals(s)) {
|
||||
pFlags |= Pattern.COMMENTS;
|
||||
} else if ("UNICODE_CHAR_CLASS".equals(s)) {
|
||||
pFlags |= UNICODE_CHARACTER_CLASS;
|
||||
} else {
|
||||
throw new ElasticSearchIllegalArgumentException("Unknown regex flag [" + s + "]");
|
||||
}
|
||||
|
@ -155,6 +166,9 @@ public class Regex {
|
|||
}
|
||||
if ((flags & Pattern.COMMENTS) != 0) {
|
||||
sb.append("COMMENTS|");
|
||||
}
|
||||
if ((flags & UNICODE_CHARACTER_CLASS) != 0) {
|
||||
sb.append("UNICODE_CHAR_CLASS|");
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.test.unit.common.regex;
|
||||
|
||||
import java.util.Random;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
public class RegexTests {
|
||||
|
||||
@Test
|
||||
public void testFlags() {
|
||||
String[] supportedFlags = new String[] { "CASE_INSENSITIVE", "MULTILINE", "DOTALL", "UNICODE_CASE", "CANON_EQ", "UNIX_LINES",
|
||||
"LITERAL", "COMMENTS", "UNICODE_CHAR_CLASS" };
|
||||
int[] flags = new int[] { Pattern.CASE_INSENSITIVE, Pattern.MULTILINE, Pattern.DOTALL, Pattern.UNICODE_CASE, Pattern.CANON_EQ,
|
||||
Pattern.UNIX_LINES, Pattern.LITERAL, Pattern.COMMENTS, Regex.UNICODE_CHARACTER_CLASS };
|
||||
long seed = System.currentTimeMillis();
|
||||
Random random = new Random(seed);
|
||||
int num = 10 + random.nextInt(100);
|
||||
for (int i = 0; i < num; i++) {
|
||||
int numFlags = random.nextInt(flags.length+1);
|
||||
int current = 0;
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (int j = 0; j < numFlags; j++) {
|
||||
int index = random.nextInt(flags.length);
|
||||
current |= flags[index];
|
||||
builder.append(supportedFlags[index]);
|
||||
if (j < numFlags-1) {
|
||||
builder.append("|");
|
||||
}
|
||||
}
|
||||
String flagsToString = Regex.flagsToString(current);
|
||||
assertThat(Regex.flagsFromString(builder.toString()), equalTo(current));
|
||||
assertThat(Regex.flagsFromString(builder.toString()), equalTo(Regex.flagsFromString(flagsToString)));
|
||||
Pattern.compile("\\w\\d{1,2}", current); // accepts the flags?
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue