mirror of https://github.com/apache/lucene.git
LUCENE-1745: allow passing matching flags to the underlying regexp engine
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@799667 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f8b2f0122c
commit
0b0d13dffe
|
@ -87,6 +87,14 @@ New features
|
|||
10. LUCENE-1272: Add get/setBoost to MoreLikeThis. (Jonathan
|
||||
Leibiusky via Mike McCandless)
|
||||
|
||||
11. LUCENE-1745: Added constructors to JakartaRegexpCapabilities and
|
||||
JavaUtilRegexCapabilities as well as static flags to support
|
||||
configuring a RegexCapabilities implementation with the
|
||||
implementation-specific modifier flags. Allows for callers to
|
||||
customize the RegexQuery using the implementation-specific options
|
||||
and fine tune how regular expressions are compiled and
|
||||
matched. (Marc Zampetti zampettim@aim.com via Mike McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
1. LUCENE-1643: Re-use the collation key (RawCollationKey) for
|
||||
|
|
|
@ -28,8 +28,39 @@ import org.apache.regexp.RegexpTunnel;
|
|||
public class JakartaRegexpCapabilities implements RegexCapabilities {
|
||||
private RE regexp;
|
||||
|
||||
// Define the flags that are possible. Redefine them here
|
||||
// to avoid exposign the RE class to the caller.
|
||||
|
||||
private int flags = RE.MATCH_NORMAL;
|
||||
|
||||
/**
|
||||
* Flag to specify normal, case-sensitive matching behaviour. This is the default.
|
||||
*/
|
||||
public static final int FLAG_MATCH_NORMAL = RE.MATCH_NORMAL;
|
||||
|
||||
/**
|
||||
* Flag to specify that matching should be case-independent (folded)
|
||||
*/
|
||||
public static final int FLAG_MATCH_CASEINDEPENDENT = RE.MATCH_CASEINDEPENDENT;
|
||||
|
||||
/**
|
||||
* Contructs a RegexCapabilities with the default MATCH_NORMAL match style.
|
||||
*/
|
||||
public JakartaRegexpCapabilities() {}
|
||||
|
||||
/**
|
||||
* Constructs a RegexCapabilities with the provided match flags.
|
||||
* Multiple flags should be ORed together.
|
||||
*
|
||||
* @param flags The matching style
|
||||
*/
|
||||
public JakartaRegexpCapabilities(int flags)
|
||||
{
|
||||
this.flags = flags;
|
||||
}
|
||||
|
||||
public void compile(String pattern) {
|
||||
regexp = new RE(pattern);
|
||||
regexp = new RE(pattern, this.flags);
|
||||
}
|
||||
|
||||
public boolean match(String string) {
|
||||
|
|
|
@ -28,9 +28,46 @@ import java.util.regex.Pattern;
|
|||
*/
|
||||
public class JavaUtilRegexCapabilities implements RegexCapabilities {
|
||||
private Pattern pattern;
|
||||
private int flags = 0;
|
||||
|
||||
// Define the optional flags from Pattern that can be used.
|
||||
// Do this here to keep Pattern contained within this class.
|
||||
|
||||
public static final int FLAG_CANON_EQ = Pattern.CANON_EQ;
|
||||
public static final int FLAG_CASE_INSENSITIVE = Pattern.CASE_INSENSITIVE;
|
||||
public static final int FLAG_COMMENTS = Pattern.COMMENTS;
|
||||
public static final int FLAG_DOTALL = Pattern.DOTALL;
|
||||
public static final int FLAG_LITERAL = Pattern.LITERAL;
|
||||
public static final int FLAG_MULTILINE = Pattern.MULTILINE;
|
||||
public static final int FLAG_UNICODE_CASE = Pattern.UNICODE_CASE;
|
||||
public static final int FLAG_UNIX_LINES = Pattern.UNIX_LINES;
|
||||
|
||||
/**
|
||||
* Default constructor that uses java.util.regex.Pattern
|
||||
* with its default flags.
|
||||
*/
|
||||
public JavaUtilRegexCapabilities() {
|
||||
this.flags = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor that allows for the modification of the flags that
|
||||
* the java.util.regex.Pattern will use to compile the regular expression.
|
||||
* This gives the user the ability to fine-tune how the regular expression
|
||||
* to match the functionlity that they need.
|
||||
* The {@link java.util.regex.Pattern Pattern} class supports specifying
|
||||
* these fields via the regular expression text itself, but this gives the caller
|
||||
* another option to modify the behavior. Useful in cases where the regular expression text
|
||||
* cannot be modified, or if doing so is undesired.
|
||||
*
|
||||
* @flags The flags that are ORed together.
|
||||
*/
|
||||
public JavaUtilRegexCapabilities(int flags) {
|
||||
this.flags = flags;
|
||||
}
|
||||
|
||||
public void compile(String pattern) {
|
||||
this.pattern = Pattern.compile(pattern);
|
||||
this.pattern = Pattern.compile(pattern, this.flags);
|
||||
}
|
||||
|
||||
public boolean match(String string) {
|
||||
|
|
|
@ -33,6 +33,7 @@ public class TestRegexQuery extends TestCase {
|
|||
private IndexSearcher searcher;
|
||||
private final String FN = "field";
|
||||
|
||||
|
||||
public void setUp() {
|
||||
RAMDirectory directory = new RAMDirectory();
|
||||
try {
|
||||
|
@ -59,8 +60,12 @@ public class TestRegexQuery extends TestCase {
|
|||
|
||||
private Term newTerm(String value) { return new Term(FN, value); }
|
||||
|
||||
private int regexQueryNrHits(String regex) throws Exception {
|
||||
private int regexQueryNrHits(String regex, RegexCapabilities capability) throws Exception {
|
||||
RegexQuery query = new RegexQuery( newTerm(regex));
|
||||
|
||||
if ( capability != null )
|
||||
query.setRegexImplementation(capability);
|
||||
|
||||
return searcher.search(query).length();
|
||||
}
|
||||
|
||||
|
@ -68,19 +73,20 @@ public class TestRegexQuery extends TestCase {
|
|||
SpanRegexQuery srq1 = new SpanRegexQuery( newTerm(regex1));
|
||||
SpanRegexQuery srq2 = new SpanRegexQuery( newTerm(regex2));
|
||||
SpanNearQuery query = new SpanNearQuery( new SpanQuery[]{srq1, srq2}, slop, ordered);
|
||||
|
||||
return searcher.search(query).length();
|
||||
}
|
||||
|
||||
public void testRegex1() throws Exception {
|
||||
assertEquals(1, regexQueryNrHits("^q.[aeiou]c.*$"));
|
||||
assertEquals(1, regexQueryNrHits("^q.[aeiou]c.*$", null));
|
||||
}
|
||||
|
||||
public void testRegex2() throws Exception {
|
||||
assertEquals(0, regexQueryNrHits("^.[aeiou]c.*$"));
|
||||
assertEquals(0, regexQueryNrHits("^.[aeiou]c.*$", null));
|
||||
}
|
||||
|
||||
public void testRegex3() throws Exception {
|
||||
assertEquals(0, regexQueryNrHits("^q.[aeiou]c$"));
|
||||
assertEquals(0, regexQueryNrHits("^q.[aeiou]c$", null));
|
||||
}
|
||||
|
||||
public void testSpanRegex1() throws Exception {
|
||||
|
@ -99,5 +105,21 @@ public class TestRegexQuery extends TestCase {
|
|||
assertFalse(query1.equals(query2));
|
||||
}
|
||||
|
||||
public void testJakartaCaseSensativeFail() throws Exception {
|
||||
assertEquals(0, regexQueryNrHits("^.*DOG.*$", null));
|
||||
}
|
||||
|
||||
public void testJavaUtilCaseSensativeFail() throws Exception {
|
||||
assertEquals(0, regexQueryNrHits("^.*DOG.*$", null));
|
||||
}
|
||||
|
||||
public void testJakartaCaseInsensative() throws Exception {
|
||||
assertEquals(1, regexQueryNrHits("^.*DOG.*$", new JakartaRegexpCapabilities(JakartaRegexpCapabilities.FLAG_MATCH_CASEINDEPENDENT)));
|
||||
}
|
||||
|
||||
public void testJavaUtilCaseInsensative() throws Exception {
|
||||
assertEquals(1, regexQueryNrHits("^.*DOG.*$", new JavaUtilRegexCapabilities(JavaUtilRegexCapabilities.FLAG_CASE_INSENSITIVE)));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue