Added UNICODE_CHARACTER_CLASS support to Regex flags. This flag is only supported in Java7 and is ignored if set on a java 6 JVM
Closes #2895
This commit is contained in:
parent
eb21526552
commit
30f9f278c3
|
@ -22,12 +22,20 @@ package org.elasticsearch.common.regex;
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
import org.elasticsearch.common.Strings;
|
import org.elasticsearch.common.Strings;
|
||||||
|
|
||||||
|
import java.util.Locale;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class Regex {
|
public class Regex {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This Regex / {@link Pattern} flag is supported from Java 7 on.
|
||||||
|
* If set on a Java6 JVM the flag will be ignored.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public static final int UNICODE_CHARACTER_CLASS = 0x100; // supported in JAVA7
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is the str a simple match pattern.
|
* Is the str a simple match pattern.
|
||||||
|
@ -107,22 +115,25 @@ public class Regex {
|
||||||
if (s.isEmpty()) {
|
if (s.isEmpty()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if ("CASE_INSENSITIVE".equalsIgnoreCase(s)) {
|
s = s.toUpperCase(Locale.ROOT);
|
||||||
|
if ("CASE_INSENSITIVE".equals(s)) {
|
||||||
pFlags |= Pattern.CASE_INSENSITIVE;
|
pFlags |= Pattern.CASE_INSENSITIVE;
|
||||||
} else if ("MULTILINE".equalsIgnoreCase(s)) {
|
} else if ("MULTILINE".equals(s)) {
|
||||||
pFlags |= Pattern.MULTILINE;
|
pFlags |= Pattern.MULTILINE;
|
||||||
} else if ("DOTALL".equalsIgnoreCase(s)) {
|
} else if ("DOTALL".equals(s)) {
|
||||||
pFlags |= Pattern.DOTALL;
|
pFlags |= Pattern.DOTALL;
|
||||||
} else if ("UNICODE_CASE".equalsIgnoreCase(s)) {
|
} else if ("UNICODE_CASE".equals(s)) {
|
||||||
pFlags |= Pattern.UNICODE_CASE;
|
pFlags |= Pattern.UNICODE_CASE;
|
||||||
} else if ("CANON_EQ".equalsIgnoreCase(s)) {
|
} else if ("CANON_EQ".equals(s)) {
|
||||||
pFlags |= Pattern.CANON_EQ;
|
pFlags |= Pattern.CANON_EQ;
|
||||||
} else if ("UNIX_LINES".equalsIgnoreCase(s)) {
|
} else if ("UNIX_LINES".equals(s)) {
|
||||||
pFlags |= Pattern.UNIX_LINES;
|
pFlags |= Pattern.UNIX_LINES;
|
||||||
} else if ("LITERAL".equalsIgnoreCase(s)) {
|
} else if ("LITERAL".equals(s)) {
|
||||||
pFlags |= Pattern.LITERAL;
|
pFlags |= Pattern.LITERAL;
|
||||||
} else if ("COMMENTS".equalsIgnoreCase(s)) {
|
} else if ("COMMENTS".equals(s)) {
|
||||||
pFlags |= Pattern.COMMENTS;
|
pFlags |= Pattern.COMMENTS;
|
||||||
|
} else if ("UNICODE_CHAR_CLASS".equals(s)) {
|
||||||
|
pFlags |= UNICODE_CHARACTER_CLASS;
|
||||||
} else {
|
} else {
|
||||||
throw new ElasticSearchIllegalArgumentException("Unknown regex flag [" + s + "]");
|
throw new ElasticSearchIllegalArgumentException("Unknown regex flag [" + s + "]");
|
||||||
}
|
}
|
||||||
|
@ -155,6 +166,9 @@ public class Regex {
|
||||||
}
|
}
|
||||||
if ((flags & Pattern.COMMENTS) != 0) {
|
if ((flags & Pattern.COMMENTS) != 0) {
|
||||||
sb.append("COMMENTS|");
|
sb.append("COMMENTS|");
|
||||||
|
}
|
||||||
|
if ((flags & UNICODE_CHARACTER_CLASS) != 0) {
|
||||||
|
sb.append("UNICODE_CHAR_CLASS|");
|
||||||
}
|
}
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.test.unit.common.regex;
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.regex.Regex;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import static org.hamcrest.MatcherAssert.assertThat;
|
||||||
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
public class RegexTests {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFlags() {
|
||||||
|
String[] supportedFlags = new String[] { "CASE_INSENSITIVE", "MULTILINE", "DOTALL", "UNICODE_CASE", "CANON_EQ", "UNIX_LINES",
|
||||||
|
"LITERAL", "COMMENTS", "UNICODE_CHAR_CLASS" };
|
||||||
|
int[] flags = new int[] { Pattern.CASE_INSENSITIVE, Pattern.MULTILINE, Pattern.DOTALL, Pattern.UNICODE_CASE, Pattern.CANON_EQ,
|
||||||
|
Pattern.UNIX_LINES, Pattern.LITERAL, Pattern.COMMENTS, Regex.UNICODE_CHARACTER_CLASS };
|
||||||
|
long seed = System.currentTimeMillis();
|
||||||
|
Random random = new Random(seed);
|
||||||
|
int num = 10 + random.nextInt(100);
|
||||||
|
for (int i = 0; i < num; i++) {
|
||||||
|
int numFlags = random.nextInt(flags.length+1);
|
||||||
|
int current = 0;
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
for (int j = 0; j < numFlags; j++) {
|
||||||
|
int index = random.nextInt(flags.length);
|
||||||
|
current |= flags[index];
|
||||||
|
builder.append(supportedFlags[index]);
|
||||||
|
if (j < numFlags-1) {
|
||||||
|
builder.append("|");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
String flagsToString = Regex.flagsToString(current);
|
||||||
|
assertThat(Regex.flagsFromString(builder.toString()), equalTo(current));
|
||||||
|
assertThat(Regex.flagsFromString(builder.toString()), equalTo(Regex.flagsFromString(flagsToString)));
|
||||||
|
Pattern.compile("\\w\\d{1,2}", current); // accepts the flags?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue