LUCENE-1745: allow passing matching flags to the underlying regexp engine

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@799667 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-07-31 17:41:04 +00:00
parent f8b2f0122c
commit 0b0d13dffe
4 changed files with 105 additions and 7 deletions

View File

@ -87,6 +87,14 @@ New features
10. LUCENE-1272: Add get/setBoost to MoreLikeThis. (Jonathan
Leibiusky via Mike McCandless)
11. LUCENE-1745: Added constructors to JakartaRegexpCapabilities and
JavaUtilRegexCapabilities as well as static flags to support
configuring a RegexCapabilities implementation with the
implementation-specific modifier flags. Allows for callers to
customize the RegexQuery using the implementation-specific options
and fine tune how regular expressions are compiled and
matched. (Marc Zampetti zampettim@aim.com via Mike McCandless)
Optimizations
1. LUCENE-1643: Re-use the collation key (RawCollationKey) for

View File

@ -27,9 +27,40 @@ import org.apache.regexp.RegexpTunnel;
*/
public class JakartaRegexpCapabilities implements RegexCapabilities {
private RE regexp;
// Define the flags that are possible. Redefine them here
// to avoid exposign the RE class to the caller.
private int flags = RE.MATCH_NORMAL;
/**
* Flag to specify normal, case-sensitive matching behaviour. This is the default.
*/
public static final int FLAG_MATCH_NORMAL = RE.MATCH_NORMAL;
/**
* Flag to specify that matching should be case-independent (folded)
*/
public static final int FLAG_MATCH_CASEINDEPENDENT = RE.MATCH_CASEINDEPENDENT;
/**
* Contructs a RegexCapabilities with the default MATCH_NORMAL match style.
*/
public JakartaRegexpCapabilities() {}
/**
* Constructs a RegexCapabilities with the provided match flags.
* Multiple flags should be ORed together.
*
* @param flags The matching style
*/
public JakartaRegexpCapabilities(int flags)
{
this.flags = flags;
}
public void compile(String pattern) {
regexp = new RE(pattern);
regexp = new RE(pattern, this.flags);
}
public boolean match(String string) {

View File

@ -28,9 +28,46 @@ import java.util.regex.Pattern;
*/
public class JavaUtilRegexCapabilities implements RegexCapabilities {
private Pattern pattern;
private int flags = 0;
// Define the optional flags from Pattern that can be used.
// Do this here to keep Pattern contained within this class.
public static final int FLAG_CANON_EQ = Pattern.CANON_EQ;
public static final int FLAG_CASE_INSENSITIVE = Pattern.CASE_INSENSITIVE;
public static final int FLAG_COMMENTS = Pattern.COMMENTS;
public static final int FLAG_DOTALL = Pattern.DOTALL;
public static final int FLAG_LITERAL = Pattern.LITERAL;
public static final int FLAG_MULTILINE = Pattern.MULTILINE;
public static final int FLAG_UNICODE_CASE = Pattern.UNICODE_CASE;
public static final int FLAG_UNIX_LINES = Pattern.UNIX_LINES;
/**
* Default constructor that uses java.util.regex.Pattern
* with its default flags.
*/
public JavaUtilRegexCapabilities() {
this.flags = 0;
}
/**
* Constructor that allows for the modification of the flags that
* the java.util.regex.Pattern will use to compile the regular expression.
* This gives the user the ability to fine-tune how the regular expression
* to match the functionlity that they need.
* The {@link java.util.regex.Pattern Pattern} class supports specifying
* these fields via the regular expression text itself, but this gives the caller
* another option to modify the behavior. Useful in cases where the regular expression text
* cannot be modified, or if doing so is undesired.
*
* @flags The flags that are ORed together.
*/
public JavaUtilRegexCapabilities(int flags) {
this.flags = flags;
}
public void compile(String pattern) {
this.pattern = Pattern.compile(pattern);
this.pattern = Pattern.compile(pattern, this.flags);
}
public boolean match(String string) {

View File

@ -33,6 +33,7 @@ public class TestRegexQuery extends TestCase {
private IndexSearcher searcher;
private final String FN = "field";
public void setUp() {
RAMDirectory directory = new RAMDirectory();
try {
@ -59,8 +60,12 @@ public class TestRegexQuery extends TestCase {
private Term newTerm(String value) { return new Term(FN, value); }
private int regexQueryNrHits(String regex) throws Exception {
private int regexQueryNrHits(String regex, RegexCapabilities capability) throws Exception {
RegexQuery query = new RegexQuery( newTerm(regex));
if ( capability != null )
query.setRegexImplementation(capability);
return searcher.search(query).length();
}
@ -68,19 +73,20 @@ public class TestRegexQuery extends TestCase {
SpanRegexQuery srq1 = new SpanRegexQuery( newTerm(regex1));
SpanRegexQuery srq2 = new SpanRegexQuery( newTerm(regex2));
SpanNearQuery query = new SpanNearQuery( new SpanQuery[]{srq1, srq2}, slop, ordered);
return searcher.search(query).length();
}
public void testRegex1() throws Exception {
assertEquals(1, regexQueryNrHits("^q.[aeiou]c.*$"));
assertEquals(1, regexQueryNrHits("^q.[aeiou]c.*$", null));
}
public void testRegex2() throws Exception {
assertEquals(0, regexQueryNrHits("^.[aeiou]c.*$"));
assertEquals(0, regexQueryNrHits("^.[aeiou]c.*$", null));
}
public void testRegex3() throws Exception {
assertEquals(0, regexQueryNrHits("^q.[aeiou]c$"));
assertEquals(0, regexQueryNrHits("^q.[aeiou]c$", null));
}
public void testSpanRegex1() throws Exception {
@ -98,6 +104,22 @@ public class TestRegexQuery extends TestCase {
RegexQuery query2 = new RegexQuery( newTerm("foo.*"));
assertFalse(query1.equals(query2));
}
public void testJakartaCaseSensativeFail() throws Exception {
assertEquals(0, regexQueryNrHits("^.*DOG.*$", null));
}
public void testJavaUtilCaseSensativeFail() throws Exception {
assertEquals(0, regexQueryNrHits("^.*DOG.*$", null));
}
public void testJakartaCaseInsensative() throws Exception {
assertEquals(1, regexQueryNrHits("^.*DOG.*$", new JakartaRegexpCapabilities(JakartaRegexpCapabilities.FLAG_MATCH_CASEINDEPENDENT)));
}
public void testJavaUtilCaseInsensative() throws Exception {
assertEquals(1, regexQueryNrHits("^.*DOG.*$", new JavaUtilRegexCapabilities(JavaUtilRegexCapabilities.FLAG_CASE_INSENSITIVE)));
}
}