diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 501f60970b6..140fd3850fc 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -131,6 +131,10 @@ API Changes New features +* LUCENE-2604: Added RegexpQuery support to QueryParser. Regular expressions + are directly supported by the standard queryparser. + (Simon Willnauer, Robert Muir) + * LUCENE-1606, LUCENE-2089: Adds AutomatonQuery, a MultiTermQuery that matches terms against a finite-state machine. Implement WildcardQuery and FuzzyQuery with finite-state methods. Adds RegexpQuery. diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt index f608c02d318..76fe37ed72c 100644 --- a/lucene/contrib/CHANGES.txt +++ b/lucene/contrib/CHANGES.txt @@ -9,6 +9,9 @@ Build New Features + * LUCENE-2604: Added RegexpQuery support to contrib/queryparser. + (Simon Willnauer, Robert Muir) + * LUCENE-2500: Added DirectIOLinuxDirectory, a Linux-specific Directory impl that uses the O_DIRECT flag to bypass the buffer cache. This is useful to prevent segment merging from evicting diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/CharStream.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/CharStream.java index 9a84eb5ba75..ca370ca85bd 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/CharStream.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/CharStream.java @@ -109,4 +109,4 @@ public interface CharStream { void Done(); } -/* JavaCC - OriginalChecksum=8cc617b193267dc876ef9699367c8186 (do not edit this line) */ +/* JavaCC - OriginalChecksum=7bcd45d10a032f1c9da64691d073cf75 (do not edit this line) */ diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/ParseException.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/ParseException.java index 6e9ec487912..3f197ceb943 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/ParseException.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/ParseException.java @@ -195,4 +195,4 @@ public class ParseException extends Exception { } } -/* JavaCC - OriginalChecksum=15fbbe38a36c8ac9e2740d030624c321 (do not edit this line) */ +/* JavaCC - OriginalChecksum=4440e368eeef562faffeca98a200334b (do not edit this line) */ diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java index 12a510b782f..70440427d07 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java @@ -17,9 +17,11 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermQuery; @@ -95,6 +97,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { private Operator operator = OR_OPERATOR; boolean lowercaseExpandedTerms = true; + MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; Analyzer analyzer; String field; @@ -232,6 +235,27 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { public boolean getLowercaseExpandedTerms() { return lowercaseExpandedTerms; } + /** + * By default PrecedenceQueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} + * when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it + * a) Runs faster b) Does not have the scarcity of terms unduly influence score + * c) avoids any "TooManyBooleanClauses" exception. + * However, if your application really needs to use the + * old-fashioned BooleanQuery expansion rewriting and the above + * points are not relevant then use this to change + * the rewrite method. + */ + public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) { + multiTermRewriteMethod = method; + } + + + /** + * @see #setMultiTermRewriteMethod + */ + public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() { + return multiTermRewriteMethod; + } /** * Set locale used by date range parsing. @@ -426,7 +450,9 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { } catch (Exception e) { } - return new TermRangeQuery(field, part1, part2, inclusive, inclusive); + final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive); + query.setRewriteMethod(multiTermRewriteMethod); + return query; } /** @@ -500,7 +526,9 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { termStr = termStr.toLowerCase(); } Term t = new Term(field, termStr); - return new WildcardQuery(t); + final WildcardQuery query = new WildcardQuery(t); + query.setRewriteMethod(multiTermRewriteMethod); + return query; } /** @@ -532,7 +560,40 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { termStr = termStr.toLowerCase(); } Term t = new Term(field, termStr); - return new PrefixQuery(t); + final PrefixQuery query = new PrefixQuery(t); + query.setRewriteMethod(multiTermRewriteMethod); + return query; + } + + /** + * Factory method for generating a query. Called when parser + * parses an input term token that contains a regular expression + * query. + *

+ * Depending on settings, pattern term may be lower-cased + * automatically. It will not go through the default Analyzer, + * however, since normal Analyzers are unlikely to work properly + * with regular expression templates. + *

+ * Can be overridden by extending classes, to provide custom handling for + * regular expression queries, which may be necessary due to missing analyzer + * calls. + * + * @param field Name of the field query will use. + * @param termStr Term token that contains a regular expression + * + * @return Resulting {@link Query} built for the term + * @exception ParseException throw in overridden method to disallow + */ + protected Query getRegexpQuery(String field, String termStr) throws ParseException + { + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); + } + final Term regexp = new Term(field, termStr); + final RegexpQuery query = new RegexpQuery(regexp); + query.setRewriteMethod(multiTermRewriteMethod); + return query; } /** @@ -675,6 +736,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { case TERM: case PREFIXTERM: case WILDTERM: + case REGEXPTERM: case RANGEIN_START: case RANGEEX_START: case NUMBER: @@ -750,6 +812,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { case TERM: case PREFIXTERM: case WILDTERM: + case REGEXPTERM: case RANGEIN_START: case RANGEEX_START: case NUMBER: @@ -790,11 +853,14 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { boolean prefix = false; boolean wildcard = false; boolean fuzzy = false; + boolean regexp = false; + Query q; switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case TERM: case PREFIXTERM: case WILDTERM: + case REGEXPTERM: case NUMBER: switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case TERM: @@ -808,6 +874,10 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { term = jj_consume_token(WILDTERM); wildcard=true; break; + case REGEXPTERM: + term = jj_consume_token(REGEXPTERM); + regexp=true; + break; case NUMBER: term = jj_consume_token(NUMBER); break; @@ -850,6 +920,8 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { q = getPrefixQuery(field, discardEscapeChar(term.image.substring (0, term.image.length()-1))); + } else if (regexp) { + q = getRegexpQuery(field, term.image.substring(1, term.image.length()-1)); } else if (fuzzy) { float fms = fuzzyMinSim; try { @@ -1055,11 +1127,16 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { private int jj_gen; final private int[] jj_la1 = new int[24]; static private int[] jj_la1_0; + static private int[] jj_la1_1; static { jj_la1_init_0(); + jj_la1_init_1(); } private static void jj_la1_init_0() { - jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0xfb1f00,0x100,0x80,0x8000,0xfb1000,0x9a0000,0x40000,0x40000,0x8000,0xc000000,0x1000000,0xc000000,0x8000,0xc0000000,0x10000000,0xc0000000,0x8000,0x40000,0x8000,0xfb0000,}; + jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0x1fb1f00,0x100,0x80,0x8000,0x1fb1000,0x13a0000,0x40000,0x40000,0x8000,0x18000000,0x2000000,0x18000000,0x8000,0x80000000,0x20000000,0x80000000,0x8000,0x40000,0x8000,0x1fb0000,}; + } + private static void jj_la1_init_1() { + jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x1,0x0,0x0,0x0,0x0,}; } final private JJCalls[] jj_2_rtns = new JJCalls[1]; private boolean jj_rescan = false; @@ -1213,7 +1290,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { /** Generate ParseException. */ public ParseException generateParseException() { jj_expentries.clear(); - boolean[] la1tokens = new boolean[32]; + boolean[] la1tokens = new boolean[33]; if (jj_kind >= 0) { la1tokens[jj_kind] = true; jj_kind = -1; @@ -1224,10 +1301,13 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { if ((jj_la1_0[i] & (1< + * Depending on settings, pattern term may be lower-cased + * automatically. It will not go through the default Analyzer, + * however, since normal Analyzers are unlikely to work properly + * with regular expression templates. + *

+ * Can be overridden by extending classes, to provide custom handling for + * regular expression queries, which may be necessary due to missing analyzer + * calls. + * + * @param field Name of the field query will use. + * @param termStr Term token that contains a regular expression + * + * @return Resulting {@link Query} built for the term + * @exception ParseException throw in overridden method to disallow + */ + protected Query getRegexpQuery(String field, String termStr) throws ParseException + { + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); + } + final Term regexp = new Term(field, termStr); + final RegexpQuery query = new RegexpQuery(regexp); + query.setRewriteMethod(multiTermRewriteMethod); + return query; } /** @@ -678,6 +739,7 @@ PARSER_END(PrecedenceQueryParser) | (<_TERM_CHAR>)* "*" > | (<_TERM_CHAR> | ( [ "*", "?" ] ))* > +| | : RangeIn | : RangeEx } @@ -813,6 +875,8 @@ Query Term(String field) : { boolean prefix = false; boolean wildcard = false; boolean fuzzy = false; + boolean regexp = false; + Query q; } { @@ -821,6 +885,7 @@ Query Term(String field) : { term= | term= { prefix=true; } | term= { wildcard=true; } + | term= { regexp=true; } | term= ) [ fuzzySlop= { fuzzy=true; } ] @@ -833,6 +898,8 @@ Query Term(String field) : { q = getPrefixQuery(field, discardEscapeChar(term.image.substring (0, term.image.length()-1))); + } else if (regexp) { + q = getRegexpQuery(field, term.image.substring(1, term.image.length()-1)); } else if (fuzzy) { float fms = fuzzyMinSim; try { diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserConstants.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserConstants.java index 50c55bd19bb..be8a0ffda92 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserConstants.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserConstants.java @@ -49,27 +49,29 @@ public interface PrecedenceQueryParserConstants { /** RegularExpression Id. */ int WILDTERM = 20; /** RegularExpression Id. */ - int RANGEIN_START = 21; + int REGEXPTERM = 21; /** RegularExpression Id. */ - int RANGEEX_START = 22; + int RANGEIN_START = 22; /** RegularExpression Id. */ - int NUMBER = 23; + int RANGEEX_START = 23; /** RegularExpression Id. */ - int RANGEIN_TO = 24; + int NUMBER = 24; /** RegularExpression Id. */ - int RANGEIN_END = 25; + int RANGEIN_TO = 25; /** RegularExpression Id. */ - int RANGEIN_QUOTED = 26; + int RANGEIN_END = 26; /** RegularExpression Id. */ - int RANGEIN_GOOP = 27; + int RANGEIN_QUOTED = 27; /** RegularExpression Id. */ - int RANGEEX_TO = 28; + int RANGEIN_GOOP = 28; /** RegularExpression Id. */ - int RANGEEX_END = 29; + int RANGEEX_TO = 29; /** RegularExpression Id. */ - int RANGEEX_QUOTED = 30; + int RANGEEX_END = 30; /** RegularExpression Id. */ - int RANGEEX_GOOP = 31; + int RANGEEX_QUOTED = 31; + /** RegularExpression Id. */ + int RANGEEX_GOOP = 32; /** Lexical state. */ int Boost = 0; @@ -103,6 +105,7 @@ public interface PrecedenceQueryParserConstants { "", "", "", + "", "\"[\"", "\"{\"", "", diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java index cef303844ae..4f3ff70994a 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java @@ -15,9 +15,11 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermQuery; @@ -66,11 +68,11 @@ private int jjMoveStringLiteralDfa0_3() case 58: return jjStopAtPos(0, 14); case 91: - return jjStopAtPos(0, 21); + return jjStopAtPos(0, 22); case 94: return jjStopAtPos(0, 15); case 123: - return jjStopAtPos(0, 22); + return jjStopAtPos(0, 23); default : return jjMoveNfa_3(0, 0); } @@ -84,7 +86,7 @@ static final long[] jjbitVec2 = { private int jjMoveNfa_3(int startState, int curPos) { int startsAt = 0; - jjnewStateCnt = 33; + jjnewStateCnt = 38; int i = 1; jjstateSet[0] = startState; int kind = 0x7fffffff; @@ -118,7 +120,9 @@ private int jjMoveNfa_3(int startState, int curPos) if (kind > 9) kind = 9; } - if (curChar == 38) + if (curChar == 47) + jjCheckNAddStates(7, 9); + else if (curChar == 38) jjstateSet[jjnewStateCnt++] = 4; break; case 4: @@ -150,7 +154,7 @@ private int jjMoveNfa_3(int startState, int curPos) break; if (kind > 18) kind = 18; - jjAddStates(7, 8); + jjAddStates(10, 11); break; case 19: if (curChar == 46) @@ -164,51 +168,64 @@ private int jjMoveNfa_3(int startState, int curPos) jjCheckNAdd(20); break; case 21: + case 23: + if (curChar == 47) + jjCheckNAddStates(7, 9); + break; + case 22: + if ((0xffff7fffffffffffL & l) != 0L) + jjCheckNAddStates(7, 9); + break; + case 25: + if (curChar == 47 && kind > 21) + kind = 21; + break; + case 26: if ((0x7bffd0f8ffffd9ffL & l) == 0L) break; if (kind > 17) kind = 17; jjCheckNAddStates(0, 6); break; - case 22: + case 27: if ((0x7bfff8f8ffffd9ffL & l) == 0L) break; if (kind > 17) kind = 17; - jjCheckNAddTwoStates(22, 23); + jjCheckNAddTwoStates(27, 28); break; - case 24: + case 29: if ((0x84002f0600000000L & l) == 0L) break; if (kind > 17) kind = 17; - jjCheckNAddTwoStates(22, 23); + jjCheckNAddTwoStates(27, 28); break; - case 25: + case 30: if ((0x7bfff8f8ffffd9ffL & l) != 0L) - jjCheckNAddStates(9, 11); + jjCheckNAddStates(12, 14); break; - case 26: + case 31: if (curChar == 42 && kind > 19) kind = 19; break; - case 28: + case 33: if ((0x84002f0600000000L & l) != 0L) - jjCheckNAddStates(9, 11); + jjCheckNAddStates(12, 14); break; - case 29: + case 34: if ((0xfbfffcf8ffffd9ffL & l) == 0L) break; if (kind > 20) kind = 20; - jjCheckNAddTwoStates(29, 30); + jjCheckNAddTwoStates(34, 35); break; - case 31: + case 36: if ((0x84002f0600000000L & l) == 0L) break; if (kind > 20) kind = 20; - jjCheckNAddTwoStates(29, 30); + jjCheckNAddTwoStates(34, 35); break; default : break; } @@ -235,7 +252,7 @@ private int jjMoveNfa_3(int startState, int curPos) jjstateSet[jjnewStateCnt++] = 18; } if (curChar == 92) - jjCheckNAddStates(12, 14); + jjCheckNAddStates(15, 17); else if (curChar == 78) jjstateSet[jjnewStateCnt++] = 11; else if (curChar == 124) @@ -286,7 +303,7 @@ private int jjMoveNfa_3(int startState, int curPos) jjstateSet[jjnewStateCnt++] = 11; break; case 15: - jjAddStates(15, 16); + jjAddStates(18, 19); break; case 17: if (curChar != 126) @@ -295,65 +312,72 @@ private int jjMoveNfa_3(int startState, int curPos) kind = 18; jjstateSet[jjnewStateCnt++] = 18; break; - case 21: + case 22: + jjAddStates(7, 9); + break; + case 24: + if (curChar == 92) + jjstateSet[jjnewStateCnt++] = 23; + break; + case 26: if ((0x97ffffff97ffffffL & l) == 0L) break; if (kind > 17) kind = 17; jjCheckNAddStates(0, 6); break; - case 22: + case 27: if ((0x97ffffff97ffffffL & l) == 0L) break; if (kind > 17) kind = 17; - jjCheckNAddTwoStates(22, 23); - break; - case 23: - if (curChar == 92) - jjCheckNAddTwoStates(24, 24); - break; - case 24: - if ((0x6800000078000000L & l) == 0L) - break; - if (kind > 17) - kind = 17; - jjCheckNAddTwoStates(22, 23); - break; - case 25: - if ((0x97ffffff97ffffffL & l) != 0L) - jjCheckNAddStates(9, 11); - break; - case 27: - if (curChar == 92) - jjCheckNAddTwoStates(28, 28); + jjCheckNAddTwoStates(27, 28); break; case 28: - if ((0x6800000078000000L & l) != 0L) - jjCheckNAddStates(9, 11); + if (curChar == 92) + jjCheckNAddTwoStates(29, 29); break; case 29: - if ((0x97ffffff97ffffffL & l) == 0L) - break; - if (kind > 20) - kind = 20; - jjCheckNAddTwoStates(29, 30); - break; - case 30: - if (curChar == 92) - jjCheckNAddTwoStates(31, 31); - break; - case 31: if ((0x6800000078000000L & l) == 0L) break; - if (kind > 20) - kind = 20; - jjCheckNAddTwoStates(29, 30); + if (kind > 17) + kind = 17; + jjCheckNAddTwoStates(27, 28); + break; + case 30: + if ((0x97ffffff97ffffffL & l) != 0L) + jjCheckNAddStates(12, 14); break; case 32: if (curChar == 92) + jjCheckNAddTwoStates(33, 33); + break; + case 33: + if ((0x6800000078000000L & l) != 0L) jjCheckNAddStates(12, 14); break; + case 34: + if ((0x97ffffff97ffffffL & l) == 0L) + break; + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(34, 35); + break; + case 35: + if (curChar == 92) + jjCheckNAddTwoStates(36, 36); + break; + case 36: + if ((0x6800000078000000L & l) == 0L) + break; + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(34, 35); + break; + case 37: + if (curChar == 92) + jjCheckNAddStates(15, 17); + break; default : break; } } while(i != startsAt); @@ -378,25 +402,29 @@ private int jjMoveNfa_3(int startState, int curPos) break; case 15: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjAddStates(15, 16); + jjAddStates(18, 19); break; case 22: + if (jjCanMove_0(hiByte, i1, i2, l1, l2)) + jjAddStates(7, 9); + break; + case 27: if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) break; if (kind > 17) kind = 17; - jjCheckNAddTwoStates(22, 23); + jjCheckNAddTwoStates(27, 28); break; - case 25: + case 30: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(9, 11); + jjCheckNAddStates(12, 14); break; - case 29: + case 34: if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) break; if (kind > 20) kind = 20; - jjCheckNAddTwoStates(29, 30); + jjCheckNAddTwoStates(34, 35); break; default : break; } @@ -409,7 +437,7 @@ private int jjMoveNfa_3(int startState, int curPos) kind = 0x7fffffff; } ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 33 - (jjnewStateCnt = startsAt))) + if ((i = jjnewStateCnt) == (startsAt = 38 - (jjnewStateCnt = startsAt))) return curPos; try { curChar = input_stream.readChar(); } catch(java.io.IOException e) { return curPos; } @@ -420,9 +448,9 @@ private final int jjStopStringLiteralDfa_1(int pos, long active0) switch (pos) { case 0: - if ((active0 & 0x10000000L) != 0L) + if ((active0 & 0x20000000L) != 0L) { - jjmatchedKind = 31; + jjmatchedKind = 32; return 4; } return -1; @@ -439,9 +467,9 @@ private int jjMoveStringLiteralDfa0_1() switch(curChar) { case 84: - return jjMoveStringLiteralDfa1_1(0x10000000L); + return jjMoveStringLiteralDfa1_1(0x20000000L); case 125: - return jjStopAtPos(0, 29); + return jjStopAtPos(0, 30); default : return jjMoveNfa_1(0, 0); } @@ -456,8 +484,8 @@ private int jjMoveStringLiteralDfa1_1(long active0) switch(curChar) { case 79: - if ((active0 & 0x10000000L) != 0L) - return jjStartNfaWithStates_1(1, 28, 4); + if ((active0 & 0x20000000L) != 0L) + return jjStartNfaWithStates_1(1, 29, 4); break; default : break; @@ -493,8 +521,8 @@ private int jjMoveNfa_1(int startState, int curPos) case 0: if ((0xfffffffeffffffffL & l) != 0L) { - if (kind > 31) - kind = 31; + if (kind > 32) + kind = 32; jjCheckNAdd(4); } if ((0x100002600L & l) != 0L) @@ -514,14 +542,14 @@ private int jjMoveNfa_1(int startState, int curPos) jjCheckNAddTwoStates(2, 3); break; case 3: - if (curChar == 34 && kind > 30) - kind = 30; + if (curChar == 34 && kind > 31) + kind = 31; break; case 4: if ((0xfffffffeffffffffL & l) == 0L) break; - if (kind > 31) - kind = 31; + if (kind > 32) + kind = 32; jjCheckNAdd(4); break; default : break; @@ -539,12 +567,12 @@ private int jjMoveNfa_1(int startState, int curPos) case 4: if ((0xdfffffffffffffffL & l) == 0L) break; - if (kind > 31) - kind = 31; + if (kind > 32) + kind = 32; jjCheckNAdd(4); break; case 2: - jjAddStates(17, 18); + jjAddStates(20, 21); break; default : break; } @@ -565,13 +593,13 @@ private int jjMoveNfa_1(int startState, int curPos) case 4: if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) break; - if (kind > 31) - kind = 31; + if (kind > 32) + kind = 32; jjCheckNAdd(4); break; case 2: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjAddStates(17, 18); + jjAddStates(20, 21); break; default : break; } @@ -615,9 +643,9 @@ private int jjMoveNfa_0(int startState, int curPos) case 0: if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 23) - kind = 23; - jjAddStates(19, 20); + if (kind > 24) + kind = 24; + jjAddStates(22, 23); break; case 1: if (curChar == 46) @@ -626,8 +654,8 @@ private int jjMoveNfa_0(int startState, int curPos) case 2: if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 23) - kind = 23; + if (kind > 24) + kind = 24; jjCheckNAdd(2); break; default : break; @@ -678,9 +706,9 @@ private final int jjStopStringLiteralDfa_2(int pos, long active0) switch (pos) { case 0: - if ((active0 & 0x1000000L) != 0L) + if ((active0 & 0x2000000L) != 0L) { - jjmatchedKind = 27; + jjmatchedKind = 28; return 4; } return -1; @@ -697,9 +725,9 @@ private int jjMoveStringLiteralDfa0_2() switch(curChar) { case 84: - return jjMoveStringLiteralDfa1_2(0x1000000L); + return jjMoveStringLiteralDfa1_2(0x2000000L); case 93: - return jjStopAtPos(0, 25); + return jjStopAtPos(0, 26); default : return jjMoveNfa_2(0, 0); } @@ -714,8 +742,8 @@ private int jjMoveStringLiteralDfa1_2(long active0) switch(curChar) { case 79: - if ((active0 & 0x1000000L) != 0L) - return jjStartNfaWithStates_2(1, 24, 4); + if ((active0 & 0x2000000L) != 0L) + return jjStartNfaWithStates_2(1, 25, 4); break; default : break; @@ -751,8 +779,8 @@ private int jjMoveNfa_2(int startState, int curPos) case 0: if ((0xfffffffeffffffffL & l) != 0L) { - if (kind > 27) - kind = 27; + if (kind > 28) + kind = 28; jjCheckNAdd(4); } if ((0x100002600L & l) != 0L) @@ -772,14 +800,14 @@ private int jjMoveNfa_2(int startState, int curPos) jjCheckNAddTwoStates(2, 3); break; case 3: - if (curChar == 34 && kind > 26) - kind = 26; + if (curChar == 34 && kind > 27) + kind = 27; break; case 4: if ((0xfffffffeffffffffL & l) == 0L) break; - if (kind > 27) - kind = 27; + if (kind > 28) + kind = 28; jjCheckNAdd(4); break; default : break; @@ -797,12 +825,12 @@ private int jjMoveNfa_2(int startState, int curPos) case 4: if ((0xffffffffdfffffffL & l) == 0L) break; - if (kind > 27) - kind = 27; + if (kind > 28) + kind = 28; jjCheckNAdd(4); break; case 2: - jjAddStates(17, 18); + jjAddStates(20, 21); break; default : break; } @@ -823,13 +851,13 @@ private int jjMoveNfa_2(int startState, int curPos) case 4: if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) break; - if (kind > 27) - kind = 27; + if (kind > 28) + kind = 28; jjCheckNAdd(4); break; case 2: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjAddStates(17, 18); + jjAddStates(20, 21); break; default : break; } @@ -849,8 +877,8 @@ private int jjMoveNfa_2(int startState, int curPos) } } static final int[] jjnextStates = { - 22, 25, 26, 29, 30, 27, 23, 18, 19, 25, 26, 27, 24, 28, 31, 15, - 16, 2, 3, 0, 1, + 27, 30, 31, 34, 35, 32, 28, 22, 24, 25, 18, 19, 30, 31, 32, 29, + 33, 36, 15, 16, 2, 3, 0, 1, }; private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2) { @@ -868,8 +896,8 @@ private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, lo /** Token literal values. */ public static final String[] jjstrLiteralImages = { "", null, null, null, null, null, null, null, null, null, "\53", "\55", "\50", -"\51", "\72", "\136", null, null, null, null, null, "\133", "\173", null, "\124\117", -"\135", null, null, "\124\117", "\175", null, null, }; +"\51", "\72", "\136", null, null, null, null, null, null, "\133", "\173", null, +"\124\117", "\135", null, null, "\124\117", "\175", null, null, }; /** Lexer state names. */ public static final String[] lexStateNames = { @@ -881,18 +909,18 @@ public static final String[] lexStateNames = { /** Lex State array. */ public static final int[] jjnewLexState = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, 1, 3, -1, - 3, -1, -1, -1, 3, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, 2, 1, 3, + -1, 3, -1, -1, -1, 3, -1, -1, }; static final long[] jjtoToken = { - 0xffffff81L, + 0x1ffffff81L, }; static final long[] jjtoSkip = { 0x40L, }; protected CharStream input_stream; -private final int[] jjrounds = new int[33]; -private final int[] jjstateSet = new int[66]; +private final int[] jjrounds = new int[38]; +private final int[] jjstateSet = new int[76]; protected char curChar; /** Constructor. */ public PrecedenceQueryParserTokenManager(CharStream stream){ @@ -917,7 +945,7 @@ private void ReInitRounds() { int i; jjround = 0x80000001; - for (i = 33; i-- > 0;) + for (i = 38; i-- > 0;) jjrounds[i] = 0x80000000; } diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/Token.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/Token.java index 8402b3d5017..7383a35eefb 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/Token.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/Token.java @@ -121,4 +121,4 @@ public class Token { } } -/* JavaCC - OriginalChecksum=0dc5808f2ab8aac8775ea9175fa2cb51 (do not edit this line) */ +/* JavaCC - OriginalChecksum=bc9495ddfa3189061fb4f1bf3c4f64e2 (do not edit this line) */ diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java index 01e87510c8f..e29d561af23 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java @@ -138,4 +138,4 @@ public class TokenMgrError extends Error this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); } } -/* JavaCC - OriginalChecksum=257b82f2650841e86289a309cb3dae76 (do not edit this line) */ +/* JavaCC - OriginalChecksum=e01667f2eb6d0b2f1fbb6958df0ca751 (do not edit this line) */ diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/RegexpQueryNodeBuilder.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/RegexpQueryNodeBuilder.java new file mode 100644 index 00000000000..e035b19836b --- /dev/null +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/RegexpQueryNodeBuilder.java @@ -0,0 +1,52 @@ +package org.apache.lucene.queryParser.standard.builders; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.core.QueryNodeException; +import org.apache.lucene.queryParser.core.nodes.QueryNode; +import org.apache.lucene.queryParser.standard.config.MultiTermRewriteMethodAttribute; +import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.RegexpQuery; + +/** + * Builds a {@link RegexpQuery} object from a {@link RegexpQueryNode} object. + */ +public class RegexpQueryNodeBuilder implements StandardQueryBuilder { + + public RegexpQueryNodeBuilder() { + // empty constructor + } + + public RegexpQuery build(QueryNode queryNode) throws QueryNodeException { + RegexpQueryNode regexpNode = (RegexpQueryNode) queryNode; + + RegexpQuery q = new RegexpQuery(new Term(regexpNode.getFieldAsString(), + regexpNode.textToBytesRef())); + + MultiTermQuery.RewriteMethod method = (MultiTermQuery.RewriteMethod) queryNode + .getTag(MultiTermRewriteMethodAttribute.TAG_ID); + if (method != null) { + q.setRewriteMethod(method); + } + + return q; + } + +} diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardQueryTreeBuilder.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardQueryTreeBuilder.java index d462aecfaf6..0db0ae2a947 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardQueryTreeBuilder.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/builders/StandardQueryTreeBuilder.java @@ -33,6 +33,7 @@ import org.apache.lucene.queryParser.core.nodes.TokenizedPhraseQueryNode; import org.apache.lucene.queryParser.standard.nodes.MultiPhraseQueryNode; import org.apache.lucene.queryParser.standard.nodes.PrefixWildcardQueryNode; import org.apache.lucene.queryParser.standard.nodes.RangeQueryNode; +import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode; import org.apache.lucene.queryParser.standard.nodes.StandardBooleanQueryNode; import org.apache.lucene.queryParser.standard.nodes.WildcardQueryNode; import org.apache.lucene.queryParser.standard.processors.StandardQueryNodeProcessorPipeline; @@ -63,6 +64,7 @@ public class StandardQueryTreeBuilder extends QueryTreeBuilder implements setBuilder(PrefixWildcardQueryNode.class, new PrefixWildcardQueryNodeBuilder()); setBuilder(RangeQueryNode.class, new RangeQueryNodeBuilder()); + setBuilder(RegexpQueryNode.class, new RegexpQueryNodeBuilder()); setBuilder(SlopQueryNode.class, new SlopQueryNodeBuilder()); setBuilder(StandardBooleanQueryNode.class, new StandardBooleanQueryNodeBuilder()); diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/RegexpQueryNode.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/RegexpQueryNode.java new file mode 100644 index 00000000000..7e4f9896a12 --- /dev/null +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/RegexpQueryNode.java @@ -0,0 +1,92 @@ +package org.apache.lucene.queryParser.standard.nodes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.queryParser.core.nodes.FieldableNode; +import org.apache.lucene.queryParser.core.nodes.QueryNodeImpl; +import org.apache.lucene.queryParser.core.nodes.TextableQueryNode; +import org.apache.lucene.queryParser.core.parser.EscapeQuerySyntax; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.util.BytesRef; + +/** + * A {@link RegexpQueryNode} represents {@link RegexpQuery} query Examples: /[a-z]|[0-9]/ + */ +public class RegexpQueryNode extends QueryNodeImpl implements TextableQueryNode, +FieldableNode { + private static final long serialVersionUID = 0L; + private CharSequence text; + private CharSequence field; + /** + * @param field + * - field name + * @param text + * - value that contains a regular expression + * @param begin + * - position in the query string + * @param end + * - position in the query string + */ + public RegexpQueryNode(CharSequence field, CharSequence text, int begin, + int end) { + this.field = field; + this.text = text.subSequence(begin, end); + } + + public BytesRef textToBytesRef() { + return new BytesRef(text); + } + + @Override + public String toString() { + return ""; + } + + @Override + public RegexpQueryNode cloneTree() throws CloneNotSupportedException { + RegexpQueryNode clone = (RegexpQueryNode) super.cloneTree(); + clone.field = this.field; + clone.text = this.text; + return clone; + } + + public CharSequence getText() { + return text; + } + + public void setText(CharSequence text) { + this.text = text; + } + + public CharSequence getField() { + return field; + } + + public String getFieldAsString() { + return field.toString(); + } + + public void setField(CharSequence field) { + this.field = field; + } + + public CharSequence toQueryString(EscapeQuerySyntax escapeSyntaxParser) { + return isDefaultField(field)? "/"+text+"/": field + ":/" + text + "/"; + } + +} diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/JavaCharStream.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/JavaCharStream.java index 6c0bab932b2..9ce9050bfec 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/JavaCharStream.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/JavaCharStream.java @@ -613,4 +613,4 @@ public class JavaCharStream } } -/* JavaCC - OriginalChecksum=f19c73b8f7faf94cc4a581e7b2933cc6 (do not edit this line) */ +/* JavaCC - OriginalChecksum=31519f95b41182c6740c2afd8dfbf344 (do not edit this line) */ diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java index eee1116dccf..5336b828635 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/ParseException.java @@ -193,4 +193,4 @@ public class ParseException extends QueryNodeParseException { } } -/* JavaCC - OriginalChecksum=38bce846fe6c8482993969f741c0323e (do not edit this line) */ +/* JavaCC - OriginalChecksum=d0caeac083e9874065f9d1e298b5ccd9 (do not edit this line) */ diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.java index 20f55a32bc1..78b913966e4 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.java @@ -40,6 +40,7 @@ import org.apache.lucene.queryParser.core.nodes.OpaqueQueryNode; import org.apache.lucene.queryParser.core.nodes.OrQueryNode; import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode; import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode; +import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode; import org.apache.lucene.queryParser.core.nodes.SlopQueryNode; import org.apache.lucene.queryParser.core.nodes.ProximityQueryNode; import org.apache.lucene.queryParser.core.nodes.QueryNode; @@ -178,6 +179,7 @@ public class StandardSyntaxParser implements SyntaxParser, StandardSyntaxParserC case LPAREN: case QUOTED: case TERM: + case REGEXPTERM: case RANGEIN_START: case RANGEEX_START: case NUMBER: @@ -326,6 +328,7 @@ public class StandardSyntaxParser implements SyntaxParser, StandardSyntaxParserC switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case QUOTED: case TERM: + case REGEXPTERM: case RANGEIN_START: case RANGEEX_START: case NUMBER: @@ -373,17 +376,23 @@ public class StandardSyntaxParser implements SyntaxParser, StandardSyntaxParserC final public QueryNode Term(CharSequence field) throws ParseException { Token term, boost=null, fuzzySlop=null, goop1, goop2; boolean fuzzy = false; + boolean regexp = false; QueryNode q =null; ParametricQueryNode qLower, qUpper; float defaultMinSimilarity = 0.5f; switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case TERM: + case REGEXPTERM: case NUMBER: switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case TERM: term = jj_consume_token(TERM); q = new FieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); break; + case REGEXPTERM: + term = jj_consume_token(REGEXPTERM); + regexp=true; + break; case NUMBER: term = jj_consume_token(NUMBER); break; @@ -428,6 +437,8 @@ public class StandardSyntaxParser implements SyntaxParser, StandardSyntaxParserC {if (true) throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS));} } q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn); + } else if (regexp) { + q = new RegexpQueryNode(field, term.image, term.beginColumn, term.endColumn-1); } break; case RANGEIN_START: @@ -630,7 +641,7 @@ public class StandardSyntaxParser implements SyntaxParser, StandardSyntaxParserC jj_la1_init_0(); } private static void jj_la1_init_0() { - jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0x763c00,0x200,0x100,0x10000,0x762000,0x440000,0x80000,0x80000,0x10000,0x6000000,0x800000,0x6000000,0x10000,0x60000000,0x8000000,0x60000000,0x10000,0x80000,0x10000,0x760000,}; + jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0xf63c00,0x200,0x100,0x10000,0xf62000,0x940000,0x80000,0x80000,0x10000,0xc000000,0x1000000,0xc000000,0x10000,0xc0000000,0x10000000,0xc0000000,0x10000,0x80000,0x10000,0xf60000,}; } final private JJCalls[] jj_2_rtns = new JJCalls[1]; private boolean jj_rescan = false; @@ -816,7 +827,7 @@ public class StandardSyntaxParser implements SyntaxParser, StandardSyntaxParserC /** Generate ParseException. */ public ParseException generateParseException() { jj_expentries.clear(); - boolean[] la1tokens = new boolean[31]; + boolean[] la1tokens = new boolean[32]; if (jj_kind >= 0) { la1tokens[jj_kind] = true; jj_kind = -1; @@ -830,7 +841,7 @@ public class StandardSyntaxParser implements SyntaxParser, StandardSyntaxParserC } } } - for (int i = 0; i < 31; i++) { + for (int i = 0; i < 32; i++) { if (la1tokens[i]) { jj_expentry = new int[1]; jj_expentry[0] = i; diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj index 47129fa929e..de0b364f167 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParser.jj @@ -52,6 +52,7 @@ import org.apache.lucene.queryParser.core.nodes.OpaqueQueryNode; import org.apache.lucene.queryParser.core.nodes.OrQueryNode; import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode; import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode; +import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode; import org.apache.lucene.queryParser.core.nodes.SlopQueryNode; import org.apache.lucene.queryParser.core.nodes.ProximityQueryNode; import org.apache.lucene.queryParser.core.nodes.QueryNode; @@ -132,6 +133,7 @@ PARSER_END(StandardSyntaxParser) | )* "\""> | (<_TERM_CHAR>)* > | )+ ( "." (<_NUM_CHAR>)+ )? )? > +| | : RangeIn | : RangeEx } @@ -374,6 +376,7 @@ QueryNode Clause(CharSequence field) : { QueryNode Term(CharSequence field) : { Token term, boost=null, fuzzySlop=null, goop1, goop2; boolean fuzzy = false; + boolean regexp = false; QueryNode q =null; ParametricQueryNode qLower, qUpper; float defaultMinSimilarity = 0.5f; @@ -382,6 +385,7 @@ QueryNode Term(CharSequence field) : { ( ( term= { q = new FieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); } + | term= { regexp=true; } | term= ) [ fuzzySlop= { fuzzy=true; } ] @@ -396,6 +400,8 @@ QueryNode Term(CharSequence field) : { throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS)); } q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn); + } else if (regexp) { + q = new RegexpQueryNode(field, term.image, term.beginColumn, term.endColumn-1); } } | ( ( goop1=|goop1= ) diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParserConstants.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParserConstants.java index 147a2d88ffc..bd4a5d15621 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParserConstants.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParserConstants.java @@ -47,27 +47,29 @@ public interface StandardSyntaxParserConstants { /** RegularExpression Id. */ int FUZZY_SLOP = 19; /** RegularExpression Id. */ - int RANGEIN_START = 20; + int REGEXPTERM = 20; /** RegularExpression Id. */ - int RANGEEX_START = 21; + int RANGEIN_START = 21; /** RegularExpression Id. */ - int NUMBER = 22; + int RANGEEX_START = 22; /** RegularExpression Id. */ - int RANGEIN_TO = 23; + int NUMBER = 23; /** RegularExpression Id. */ - int RANGEIN_END = 24; + int RANGEIN_TO = 24; /** RegularExpression Id. */ - int RANGEIN_QUOTED = 25; + int RANGEIN_END = 25; /** RegularExpression Id. */ - int RANGEIN_GOOP = 26; + int RANGEIN_QUOTED = 26; /** RegularExpression Id. */ - int RANGEEX_TO = 27; + int RANGEIN_GOOP = 27; /** RegularExpression Id. */ - int RANGEEX_END = 28; + int RANGEEX_TO = 28; /** RegularExpression Id. */ - int RANGEEX_QUOTED = 29; + int RANGEEX_END = 29; /** RegularExpression Id. */ - int RANGEEX_GOOP = 30; + int RANGEEX_QUOTED = 30; + /** RegularExpression Id. */ + int RANGEEX_GOOP = 31; /** Lexical state. */ int Boost = 0; @@ -100,6 +102,7 @@ public interface StandardSyntaxParserConstants { "", "", "", + "", "\"[\"", "\"{\"", "", diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParserTokenManager.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParserTokenManager.java index d7cb3cc9c36..66cfbdd1807 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParserTokenManager.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/StandardSyntaxParserTokenManager.java @@ -38,6 +38,7 @@ import org.apache.lucene.queryParser.core.nodes.OpaqueQueryNode; import org.apache.lucene.queryParser.core.nodes.OrQueryNode; import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode; import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode; +import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode; import org.apache.lucene.queryParser.core.nodes.SlopQueryNode; import org.apache.lucene.queryParser.core.nodes.ProximityQueryNode; import org.apache.lucene.queryParser.core.nodes.QueryNode; @@ -86,11 +87,11 @@ private int jjMoveStringLiteralDfa0_3() case 58: return jjStopAtPos(0, 15); case 91: - return jjStopAtPos(0, 20); + return jjStopAtPos(0, 21); case 94: return jjStopAtPos(0, 16); case 123: - return jjStopAtPos(0, 21); + return jjStopAtPos(0, 22); default : return jjMoveNfa_3(0, 0); } @@ -110,7 +111,7 @@ static final long[] jjbitVec4 = { private int jjMoveNfa_3(int startState, int curPos) { int startsAt = 0; - jjnewStateCnt = 28; + jjnewStateCnt = 33; int i = 1; jjstateSet[0] = startState; int kind = 0x7fffffff; @@ -144,7 +145,9 @@ private int jjMoveNfa_3(int startState, int curPos) if (kind > 10) kind = 10; } - if (curChar == 38) + if (curChar == 47) + jjCheckNAddStates(3, 5); + else if (curChar == 38) jjstateSet[jjnewStateCnt++] = 4; break; case 4: @@ -198,7 +201,7 @@ private int jjMoveNfa_3(int startState, int curPos) break; if (kind > 19) kind = 19; - jjAddStates(3, 4); + jjAddStates(6, 7); break; case 26: if (curChar == 46) @@ -211,6 +214,19 @@ private int jjMoveNfa_3(int startState, int curPos) kind = 19; jjCheckNAdd(27); break; + case 28: + case 30: + if (curChar == 47) + jjCheckNAddStates(3, 5); + break; + case 29: + if ((0xffff7fffffffffffL & l) != 0L) + jjCheckNAddStates(3, 5); + break; + case 32: + if (curChar == 47 && kind > 20) + kind = 20; + break; default : break; } } while(i != startsAt); @@ -325,6 +341,13 @@ private int jjMoveNfa_3(int startState, int curPos) kind = 19; jjstateSet[jjnewStateCnt++] = 25; break; + case 29: + jjAddStates(3, 5); + break; + case 31: + if (curChar == 92) + jjstateSet[jjnewStateCnt++] = 30; + break; default : break; } } while(i != startsAt); @@ -373,6 +396,10 @@ private int jjMoveNfa_3(int startState, int curPos) kind = 18; jjCheckNAddTwoStates(20, 21); break; + case 29: + if (jjCanMove_1(hiByte, i1, i2, l1, l2)) + jjAddStates(3, 5); + break; default : break; } } while(i != startsAt); @@ -384,7 +411,7 @@ private int jjMoveNfa_3(int startState, int curPos) kind = 0x7fffffff; } ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 28 - (jjnewStateCnt = startsAt))) + if ((i = jjnewStateCnt) == (startsAt = 33 - (jjnewStateCnt = startsAt))) return curPos; try { curChar = input_stream.readChar(); } catch(java.io.IOException e) { return curPos; } @@ -395,9 +422,9 @@ private final int jjStopStringLiteralDfa_1(int pos, long active0) switch (pos) { case 0: - if ((active0 & 0x8000000L) != 0L) + if ((active0 & 0x10000000L) != 0L) { - jjmatchedKind = 30; + jjmatchedKind = 31; return 6; } return -1; @@ -414,9 +441,9 @@ private int jjMoveStringLiteralDfa0_1() switch(curChar) { case 84: - return jjMoveStringLiteralDfa1_1(0x8000000L); + return jjMoveStringLiteralDfa1_1(0x10000000L); case 125: - return jjStopAtPos(0, 28); + return jjStopAtPos(0, 29); default : return jjMoveNfa_1(0, 0); } @@ -431,8 +458,8 @@ private int jjMoveStringLiteralDfa1_1(long active0) switch(curChar) { case 79: - if ((active0 & 0x8000000L) != 0L) - return jjStartNfaWithStates_1(1, 27, 6); + if ((active0 & 0x10000000L) != 0L) + return jjStartNfaWithStates_1(1, 28, 6); break; default : break; @@ -468,8 +495,8 @@ private int jjMoveNfa_1(int startState, int curPos) case 0: if ((0xfffffffeffffffffL & l) != 0L) { - if (kind > 30) - kind = 30; + if (kind > 31) + kind = 31; jjCheckNAdd(6); } if ((0x100002600L & l) != 0L) @@ -486,21 +513,21 @@ private int jjMoveNfa_1(int startState, int curPos) break; case 2: if ((0xfffffffbffffffffL & l) != 0L) - jjCheckNAddStates(5, 7); + jjCheckNAddStates(8, 10); break; case 3: if (curChar == 34) - jjCheckNAddStates(5, 7); + jjCheckNAddStates(8, 10); break; case 5: - if (curChar == 34 && kind > 29) - kind = 29; + if (curChar == 34 && kind > 30) + kind = 30; break; case 6: if ((0xfffffffeffffffffL & l) == 0L) break; - if (kind > 30) - kind = 30; + if (kind > 31) + kind = 31; jjCheckNAdd(6); break; default : break; @@ -518,12 +545,12 @@ private int jjMoveNfa_1(int startState, int curPos) case 6: if ((0xdfffffffffffffffL & l) == 0L) break; - if (kind > 30) - kind = 30; + if (kind > 31) + kind = 31; jjCheckNAdd(6); break; case 2: - jjAddStates(5, 7); + jjAddStates(8, 10); break; case 4: if (curChar == 92) @@ -552,20 +579,20 @@ private int jjMoveNfa_1(int startState, int curPos) } if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { - if (kind > 30) - kind = 30; + if (kind > 31) + kind = 31; jjCheckNAdd(6); } break; case 2: if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - jjAddStates(5, 7); + jjAddStates(8, 10); break; case 6: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) break; - if (kind > 30) - kind = 30; + if (kind > 31) + kind = 31; jjCheckNAdd(6); break; default : break; @@ -610,9 +637,9 @@ private int jjMoveNfa_0(int startState, int curPos) case 0: if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 22) - kind = 22; - jjAddStates(8, 9); + if (kind > 23) + kind = 23; + jjAddStates(11, 12); break; case 1: if (curChar == 46) @@ -621,8 +648,8 @@ private int jjMoveNfa_0(int startState, int curPos) case 2: if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 22) - kind = 22; + if (kind > 23) + kind = 23; jjCheckNAdd(2); break; default : break; @@ -673,9 +700,9 @@ private final int jjStopStringLiteralDfa_2(int pos, long active0) switch (pos) { case 0: - if ((active0 & 0x800000L) != 0L) + if ((active0 & 0x1000000L) != 0L) { - jjmatchedKind = 26; + jjmatchedKind = 27; return 6; } return -1; @@ -692,9 +719,9 @@ private int jjMoveStringLiteralDfa0_2() switch(curChar) { case 84: - return jjMoveStringLiteralDfa1_2(0x800000L); + return jjMoveStringLiteralDfa1_2(0x1000000L); case 93: - return jjStopAtPos(0, 24); + return jjStopAtPos(0, 25); default : return jjMoveNfa_2(0, 0); } @@ -709,8 +736,8 @@ private int jjMoveStringLiteralDfa1_2(long active0) switch(curChar) { case 79: - if ((active0 & 0x800000L) != 0L) - return jjStartNfaWithStates_2(1, 23, 6); + if ((active0 & 0x1000000L) != 0L) + return jjStartNfaWithStates_2(1, 24, 6); break; default : break; @@ -746,8 +773,8 @@ private int jjMoveNfa_2(int startState, int curPos) case 0: if ((0xfffffffeffffffffL & l) != 0L) { - if (kind > 26) - kind = 26; + if (kind > 27) + kind = 27; jjCheckNAdd(6); } if ((0x100002600L & l) != 0L) @@ -764,21 +791,21 @@ private int jjMoveNfa_2(int startState, int curPos) break; case 2: if ((0xfffffffbffffffffL & l) != 0L) - jjCheckNAddStates(5, 7); + jjCheckNAddStates(8, 10); break; case 3: if (curChar == 34) - jjCheckNAddStates(5, 7); + jjCheckNAddStates(8, 10); break; case 5: - if (curChar == 34 && kind > 25) - kind = 25; + if (curChar == 34 && kind > 26) + kind = 26; break; case 6: if ((0xfffffffeffffffffL & l) == 0L) break; - if (kind > 26) - kind = 26; + if (kind > 27) + kind = 27; jjCheckNAdd(6); break; default : break; @@ -796,12 +823,12 @@ private int jjMoveNfa_2(int startState, int curPos) case 6: if ((0xffffffffdfffffffL & l) == 0L) break; - if (kind > 26) - kind = 26; + if (kind > 27) + kind = 27; jjCheckNAdd(6); break; case 2: - jjAddStates(5, 7); + jjAddStates(8, 10); break; case 4: if (curChar == 92) @@ -830,20 +857,20 @@ private int jjMoveNfa_2(int startState, int curPos) } if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { - if (kind > 26) - kind = 26; + if (kind > 27) + kind = 27; jjCheckNAdd(6); } break; case 2: if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - jjAddStates(5, 7); + jjAddStates(8, 10); break; case 6: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) break; - if (kind > 26) - kind = 26; + if (kind > 27) + kind = 27; jjCheckNAdd(6); break; default : break; @@ -864,7 +891,7 @@ private int jjMoveNfa_2(int startState, int curPos) } } static final int[] jjnextStates = { - 15, 16, 18, 25, 26, 2, 4, 5, 0, 1, + 15, 16, 18, 29, 31, 32, 25, 26, 2, 4, 5, 0, 1, }; private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2) { @@ -906,8 +933,8 @@ private static final boolean jjCanMove_2(int hiByte, int i1, int i2, long l1, lo /** Token literal values. */ public static final String[] jjstrLiteralImages = { "", null, null, null, null, null, null, null, null, null, null, "\53", "\55", -"\50", "\51", "\72", "\136", null, null, null, "\133", "\173", null, "\124\117", -"\135", null, null, "\124\117", "\175", null, null, }; +"\50", "\51", "\72", "\136", null, null, null, null, "\133", "\173", null, +"\124\117", "\135", null, null, "\124\117", "\175", null, null, }; /** Lexer state names. */ public static final String[] lexStateNames = { @@ -919,18 +946,18 @@ public static final String[] lexStateNames = { /** Lex State array. */ public static final int[] jjnewLexState = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, 2, 1, 3, -1, 3, - -1, -1, -1, 3, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, 2, 1, 3, -1, + 3, -1, -1, -1, 3, -1, -1, }; static final long[] jjtoToken = { - 0x7fffff01L, + 0xffffff01L, }; static final long[] jjtoSkip = { 0x80L, }; protected JavaCharStream input_stream; -private final int[] jjrounds = new int[28]; -private final int[] jjstateSet = new int[56]; +private final int[] jjrounds = new int[33]; +private final int[] jjstateSet = new int[66]; protected char curChar; /** Constructor. */ public StandardSyntaxParserTokenManager(JavaCharStream stream){ @@ -957,7 +984,7 @@ private void ReInitRounds() { int i; jjround = 0x80000001; - for (i = 28; i-- > 0;) + for (i = 33; i-- > 0;) jjrounds[i] = 0x80000000; } diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/Token.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/Token.java index cb0e250a0eb..da97d86decc 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/Token.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/Token.java @@ -121,4 +121,4 @@ public class Token { } } -/* JavaCC - OriginalChecksum=0aac6816ecd328eda2f38b9d09739ab6 (do not edit this line) */ +/* JavaCC - OriginalChecksum=cecb6022e0f2e2fca751015375f6d319 (do not edit this line) */ diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/TokenMgrError.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/TokenMgrError.java index bfe8feea01c..06d602ecad5 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/TokenMgrError.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser/TokenMgrError.java @@ -138,4 +138,4 @@ public class TokenMgrError extends Error this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); } } -/* JavaCC - OriginalChecksum=a75b5b61664a73631a032a6e44f4b38a (do not edit this line) */ +/* JavaCC - OriginalChecksum=0e9c5fad06efef4f41f97b851ac7b0ce (do not edit this line) */ diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/LowercaseExpandedTermsQueryNodeProcessor.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/LowercaseExpandedTermsQueryNodeProcessor.java index 843b31ac648..14f5f5d32ea 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/LowercaseExpandedTermsQueryNodeProcessor.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/LowercaseExpandedTermsQueryNodeProcessor.java @@ -25,9 +25,11 @@ import org.apache.lucene.queryParser.core.nodes.FieldQueryNode; import org.apache.lucene.queryParser.core.nodes.FuzzyQueryNode; import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode; import org.apache.lucene.queryParser.core.nodes.QueryNode; +import org.apache.lucene.queryParser.core.nodes.TextableQueryNode; import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl; import org.apache.lucene.queryParser.core.util.UnescapedCharSequence; import org.apache.lucene.queryParser.standard.config.LowercaseExpandedTermsAttribute; +import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode; import org.apache.lucene.queryParser.standard.nodes.WildcardQueryNode; /** @@ -70,10 +72,10 @@ public class LowercaseExpandedTermsQueryNodeProcessor extends protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException { if (node instanceof WildcardQueryNode || node instanceof FuzzyQueryNode - || node instanceof ParametricQueryNode) { + || node instanceof ParametricQueryNode || node instanceof RegexpQueryNode) { - FieldQueryNode fieldNode = (FieldQueryNode) node; - fieldNode.setText(UnescapedCharSequence.toLowerCase(fieldNode.getText())); + TextableQueryNode txtNode = (TextableQueryNode) node; + txtNode.setText(UnescapedCharSequence.toLowerCase(txtNode.getText())); } return node; diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/MultiTermRewriteMethodProcessor.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/MultiTermRewriteMethodProcessor.java index 2e3c5989d7c..61ea306f7ab 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/MultiTermRewriteMethodProcessor.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/MultiTermRewriteMethodProcessor.java @@ -23,6 +23,7 @@ import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode; import org.apache.lucene.queryParser.core.nodes.QueryNode; import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl; import org.apache.lucene.queryParser.standard.config.MultiTermRewriteMethodAttribute; +import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode; import org.apache.lucene.queryParser.standard.nodes.WildcardQueryNode; import org.apache.lucene.search.MultiTermQuery; @@ -40,7 +41,7 @@ public class MultiTermRewriteMethodProcessor extends QueryNodeProcessorImpl { // set setMultiTermRewriteMethod for WildcardQueryNode and // PrefixWildcardQueryNode if (node instanceof WildcardQueryNode - || node instanceof ParametricRangeQueryNode) { + || node instanceof ParametricRangeQueryNode || node instanceof RegexpQueryNode) { if (!getQueryConfigHandler().hasAttribute( MultiTermRewriteMethodAttribute.class)) { diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java index 9336eff5dba..2d3b3beff75 100644 --- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java @@ -28,15 +28,19 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.document.DateTools; import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.util.LocalizedTestCase; import org.apache.lucene.util.automaton.BasicAutomata; import org.apache.lucene.util.automaton.CharacterRunAutomaton; @@ -690,6 +694,35 @@ public class TestPrecedenceQueryParser extends LocalizedTestCase { query2 = parser.parse("A (-B +C)"); assertEquals(query1, query2); } + + public void testRegexps() throws Exception { + PrecedenceQueryParser qp = getParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); + RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]")); + assertEquals(q, qp.parse("/[a-z][123]/")); + qp.setLowercaseExpandedTerms(true); + assertEquals(q, qp.parse("/[A-Z][123]/")); + q.setBoost(0.5f); + assertEquals(q, qp.parse("/[A-Z][123]/^0.5")); + qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + assertTrue(qp.parse("/[A-Z][123]/^0.5") instanceof RegexpQuery); + assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)qp.parse("/[A-Z][123]/^0.5")).getRewriteMethod()); + assertEquals(q, qp.parse("/[A-Z][123]/^0.5")); + qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); + + Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]")); + assertEquals(escaped, qp.parse("/[a-z]\\/[123]/")); + Query escaped2 = new RegexpQuery(new Term("field", "[a-z]\\*[123]")); + assertEquals(escaped2, qp.parse("/[a-z]\\*[123]/")); + + BooleanQuery complex = new BooleanQuery(); + BooleanQuery inner = new BooleanQuery(); + inner.add(new RegexpQuery(new Term("field", "[a-z]\\/[123]")), Occur.MUST); + inner.add(new TermQuery(new Term("path", "/etc/init.d/")), Occur.MUST); + complex.add(inner, Occur.SHOULD); + complex.add(new TermQuery(new Term("field", "/etc/init[.]d/lucene/")), Occur.SHOULD); + assertEquals(complex, qp.parse("/[a-z]\\/[123]/ AND path:/etc/init.d/ OR /etc\\/init\\[.\\]d/lucene/ ")); + } @Override diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java index 4f19708f509..ada321a2011 100644 --- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java @@ -68,10 +68,12 @@ import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LocalizedTestCase; import org.apache.lucene.util.automaton.BasicAutomata; @@ -1155,6 +1157,34 @@ public class TestQPHelper extends LocalizedTestCase { // assertEquals(1,type[0]); } + + public void testRegexps() throws Exception { + StandardQueryParser qp = new StandardQueryParser(); + final String df = "field" ; + RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]")); + assertEquals(q, qp.parse("/[a-z][123]/", df)); + qp.setLowercaseExpandedTerms(true); + assertEquals(q, qp.parse("/[A-Z][123]/", df)); + q.setBoost(0.5f); + assertEquals(q, qp.parse("/[A-Z][123]/^0.5", df)); + qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + assertTrue(qp.parse("/[A-Z][123]/^0.5", df) instanceof RegexpQuery); + assertEquals(q, qp.parse("/[A-Z][123]/^0.5", df)); + assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)qp.parse("/[A-Z][123]/^0.5", df)).getRewriteMethod()); + qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); + + Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]")); + assertEquals(escaped, qp.parse("/[a-z]\\/[123]/", df)); + Query escaped2 = new RegexpQuery(new Term("field", "[a-z]\\*[123]")); + assertEquals(escaped2, qp.parse("/[a-z]\\*[123]/", df)); + + BooleanQuery complex = new BooleanQuery(); + complex.add(new RegexpQuery(new Term("field", "[a-z]\\/[123]")), Occur.MUST); + complex.add(new TermQuery(new Term("path", "/etc/init.d/")), Occur.MUST); + complex.add(new TermQuery(new Term("field", "/etc/init[.]d/lucene/")), Occur.SHOULD); + assertEquals(complex, qp.parse("/[a-z]\\/[123]/ AND path:/etc/init.d/ OR /etc\\/init\\[.\\]d/lucene/ ", df)); + } public void testStopwords() throws Exception { StandardQueryParser qp = new StandardQueryParser(); diff --git a/lucene/src/java/org/apache/lucene/queryParser/CharStream.java b/lucene/src/java/org/apache/lucene/queryParser/CharStream.java index 4423996dadf..9e546d50a41 100644 --- a/lucene/src/java/org/apache/lucene/queryParser/CharStream.java +++ b/lucene/src/java/org/apache/lucene/queryParser/CharStream.java @@ -109,4 +109,4 @@ public interface CharStream { void Done(); } -/* JavaCC - OriginalChecksum=32a89423891f765dde472f7ef0e3ef7b (do not edit this line) */ +/* JavaCC - OriginalChecksum=a83909a2403f969f94d18375f9f143e4 (do not edit this line) */ diff --git a/lucene/src/java/org/apache/lucene/queryParser/ParseException.java b/lucene/src/java/org/apache/lucene/queryParser/ParseException.java index b48a44644d8..fdb47847a38 100644 --- a/lucene/src/java/org/apache/lucene/queryParser/ParseException.java +++ b/lucene/src/java/org/apache/lucene/queryParser/ParseException.java @@ -195,4 +195,4 @@ public class ParseException extends Exception { } } -/* JavaCC - OriginalChecksum=c7631a240f7446940695eac31d9483ca (do not edit this line) */ +/* JavaCC - OriginalChecksum=c63b396885c4ff44d7aa48d3feae60cd (do not edit this line) */ diff --git a/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java b/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java index 89e7bcfd69b..ef824b3f13c 100644 --- a/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java +++ b/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java @@ -29,6 +29,7 @@ import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermQuery; @@ -861,6 +862,17 @@ public class QueryParser implements QueryParserConstants { return query; } + /** + * Builds a new RegexpQuery instance + * @param prefix Regexp term + * @return new RegexpQuery instance + */ + protected Query newRegexpQuery(Term regexp) { + RegexpQuery query = new RegexpQuery(regexp); + query.setRewriteMethod(multiTermRewriteMethod); + return query; + } + /** * Builds a new FuzzyQuery instance * @param term Term @@ -985,6 +997,35 @@ public class QueryParser implements QueryParserConstants { return newWildcardQuery(t); } + /** + * Factory method for generating a query. Called when parser + * parses an input term token that contains a regular expression + * query. + *

+ * Depending on settings, pattern term may be lower-cased + * automatically. It will not go through the default Analyzer, + * however, since normal Analyzers are unlikely to work properly + * with regular expression templates. + *

+ * Can be overridden by extending classes, to provide custom handling for + * regular expression queries, which may be necessary due to missing analyzer + * calls. + * + * @param field Name of the field query will use. + * @param termStr Term token that contains a regular expression + * + * @return Resulting {@link Query} built for the term + * @exception ParseException throw in overridden method to disallow + */ + protected Query getRegexpQuery(String field, String termStr) throws ParseException + { + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); + } + Term t = new Term(field, termStr); + return newRegexpQuery(t); + } + /** * Factory method for generating a query (similar to * {@link #getWildcardQuery}). Called when parser parses an input term @@ -1234,6 +1275,7 @@ public class QueryParser implements QueryParserConstants { case TERM: case PREFIXTERM: case WILDTERM: + case REGEXPTERM: case RANGEIN_START: case RANGEEX_START: case NUMBER: @@ -1285,6 +1327,7 @@ public class QueryParser implements QueryParserConstants { case TERM: case PREFIXTERM: case WILDTERM: + case REGEXPTERM: case RANGEIN_START: case RANGEEX_START: case NUMBER: @@ -1325,12 +1368,14 @@ public class QueryParser implements QueryParserConstants { boolean prefix = false; boolean wildcard = false; boolean fuzzy = false; + boolean regexp = false; Query q; switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case STAR: case TERM: case PREFIXTERM: case WILDTERM: + case REGEXPTERM: case NUMBER: switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case TERM: @@ -1348,6 +1393,10 @@ public class QueryParser implements QueryParserConstants { term = jj_consume_token(WILDTERM); wildcard=true; break; + case REGEXPTERM: + term = jj_consume_token(REGEXPTERM); + regexp=true; + break; case NUMBER: term = jj_consume_token(NUMBER); break; @@ -1390,6 +1439,8 @@ public class QueryParser implements QueryParserConstants { q = getPrefixQuery(field, discardEscapeChar(term.image.substring (0, term.image.length()-1))); + } else if (regexp) { + q = getRegexpQuery(field, term.image.substring(1, term.image.length()-1)); } else if (fuzzy) { float fms = fuzzyMinSim; try { @@ -1569,6 +1620,12 @@ public class QueryParser implements QueryParserConstants { finally { jj_save(0, xla); } } + private boolean jj_3R_2() { + if (jj_scan_token(TERM)) return true; + if (jj_scan_token(COLON)) return true; + return false; + } + private boolean jj_3_1() { Token xsp; xsp = jj_scanpos; @@ -1585,12 +1642,6 @@ public class QueryParser implements QueryParserConstants { return false; } - private boolean jj_3R_2() { - if (jj_scan_token(TERM)) return true; - if (jj_scan_token(COLON)) return true; - return false; - } - /** Generated Token Manager. */ public QueryParserTokenManager token_source; /** Current token. */ @@ -1609,10 +1660,10 @@ public class QueryParser implements QueryParserConstants { jj_la1_init_1(); } private static void jj_la1_init_0() { - jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0x3ed3f00,0x90000,0x20000,0x3ed2000,0x2690000,0x100000,0x100000,0x20000,0x30000000,0x4000000,0x30000000,0x20000,0x0,0x40000000,0x0,0x20000,0x100000,0x20000,0x3ed0000,}; + jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0x7ed3f00,0x90000,0x20000,0x7ed2000,0x4e90000,0x100000,0x100000,0x20000,0x60000000,0x8000000,0x60000000,0x20000,0x0,0x80000000,0x0,0x20000,0x100000,0x20000,0x7ed0000,}; } private static void jj_la1_init_1() { - jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3,0x0,0x3,0x0,0x0,0x0,0x0,}; + jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x6,0x0,0x6,0x0,0x0,0x0,0x0,}; } final private JJCalls[] jj_2_rtns = new JJCalls[1]; private boolean jj_rescan = false; @@ -1766,7 +1817,7 @@ public class QueryParser implements QueryParserConstants { /** Generate ParseException. */ public ParseException generateParseException() { jj_expentries.clear(); - boolean[] la1tokens = new boolean[34]; + boolean[] la1tokens = new boolean[35]; if (jj_kind >= 0) { la1tokens[jj_kind] = true; jj_kind = -1; @@ -1783,7 +1834,7 @@ public class QueryParser implements QueryParserConstants { } } } - for (int i = 0; i < 34; i++) { + for (int i = 0; i < 35; i++) { if (la1tokens[i]) { jj_expentry = new int[1]; jj_expentry[0] = i; diff --git a/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj b/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj index 5e8a2074d6c..36f416ac926 100644 --- a/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj +++ b/lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj @@ -53,6 +53,7 @@ import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermQuery; @@ -885,6 +886,17 @@ public class QueryParser { return query; } + /** + * Builds a new RegexpQuery instance + * @param prefix Regexp term + * @return new RegexpQuery instance + */ + protected Query newRegexpQuery(Term regexp) { + RegexpQuery query = new RegexpQuery(regexp); + query.setRewriteMethod(multiTermRewriteMethod); + return query; + } + /** * Builds a new FuzzyQuery instance * @param term Term @@ -1009,6 +1021,35 @@ public class QueryParser { return newWildcardQuery(t); } + /** + * Factory method for generating a query. Called when parser + * parses an input term token that contains a regular expression + * query. + *

+ * Depending on settings, pattern term may be lower-cased + * automatically. It will not go through the default Analyzer, + * however, since normal Analyzers are unlikely to work properly + * with regular expression templates. + *

+ * Can be overridden by extending classes, to provide custom handling for + * regular expression queries, which may be necessary due to missing analyzer + * calls. + * + * @param field Name of the field query will use. + * @param termStr Term token that contains a regular expression + * + * @return Resulting {@link Query} built for the term + * @exception ParseException throw in overridden method to disallow + */ + protected Query getRegexpQuery(String field, String termStr) throws ParseException + { + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); + } + Term t = new Term(field, termStr); + return newRegexpQuery(t); + } + /** * Factory method for generating a query (similar to * {@link #getWildcardQuery}). Called when parser parses an input term @@ -1218,6 +1259,7 @@ PARSER_END(QueryParser) | )+ ( "." (<_NUM_CHAR>)+ )? )? > | (<_TERM_CHAR>)* "*" ) > | | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* > +| | : RangeIn | : RangeEx } @@ -1340,6 +1382,7 @@ Query Term(String field) : { boolean prefix = false; boolean wildcard = false; boolean fuzzy = false; + boolean regexp = false; Query q; } { @@ -1349,6 +1392,7 @@ Query Term(String field) : { | term= { wildcard=true; } | term= { prefix=true; } | term= { wildcard=true; } + | term= { regexp=true; } | term= ) [ fuzzySlop= { fuzzy=true; } ] @@ -1361,6 +1405,8 @@ Query Term(String field) : { q = getPrefixQuery(field, discardEscapeChar(term.image.substring (0, term.image.length()-1))); + } else if (regexp) { + q = getRegexpQuery(field, term.image.substring(1, term.image.length()-1)); } else if (fuzzy) { float fms = fuzzyMinSim; try { diff --git a/lucene/src/java/org/apache/lucene/queryParser/QueryParserConstants.java b/lucene/src/java/org/apache/lucene/queryParser/QueryParserConstants.java index 0073663cb42..5bf9a46c0a0 100644 --- a/lucene/src/java/org/apache/lucene/queryParser/QueryParserConstants.java +++ b/lucene/src/java/org/apache/lucene/queryParser/QueryParserConstants.java @@ -53,27 +53,29 @@ public interface QueryParserConstants { /** RegularExpression Id. */ int WILDTERM = 22; /** RegularExpression Id. */ - int RANGEIN_START = 23; + int REGEXPTERM = 23; /** RegularExpression Id. */ - int RANGEEX_START = 24; + int RANGEIN_START = 24; /** RegularExpression Id. */ - int NUMBER = 25; + int RANGEEX_START = 25; /** RegularExpression Id. */ - int RANGEIN_TO = 26; + int NUMBER = 26; /** RegularExpression Id. */ - int RANGEIN_END = 27; + int RANGEIN_TO = 27; /** RegularExpression Id. */ - int RANGEIN_QUOTED = 28; + int RANGEIN_END = 28; /** RegularExpression Id. */ - int RANGEIN_GOOP = 29; + int RANGEIN_QUOTED = 29; /** RegularExpression Id. */ - int RANGEEX_TO = 30; + int RANGEIN_GOOP = 30; /** RegularExpression Id. */ - int RANGEEX_END = 31; + int RANGEEX_TO = 31; /** RegularExpression Id. */ - int RANGEEX_QUOTED = 32; + int RANGEEX_END = 32; /** RegularExpression Id. */ - int RANGEEX_GOOP = 33; + int RANGEEX_QUOTED = 33; + /** RegularExpression Id. */ + int RANGEEX_GOOP = 34; /** Lexical state. */ int Boost = 0; @@ -109,6 +111,7 @@ public interface QueryParserConstants { "", "", "", + "", "\"[\"", "\"{\"", "", diff --git a/lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java b/lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java index 1aec8bb370f..2e1a6d481dc 100644 --- a/lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java +++ b/lucene/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java @@ -27,6 +27,7 @@ import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermQuery; @@ -70,7 +71,7 @@ private int jjMoveStringLiteralDfa0_3() case 41: return jjStopAtPos(0, 14); case 42: - return jjStartNfaWithStates_3(0, 16, 36); + return jjStartNfaWithStates_3(0, 16, 41); case 43: return jjStopAtPos(0, 11); case 45: @@ -78,11 +79,11 @@ private int jjMoveStringLiteralDfa0_3() case 58: return jjStopAtPos(0, 15); case 91: - return jjStopAtPos(0, 23); + return jjStopAtPos(0, 24); case 94: return jjStopAtPos(0, 17); case 123: - return jjStopAtPos(0, 24); + return jjStopAtPos(0, 25); default : return jjMoveNfa_3(0, 0); } @@ -110,7 +111,7 @@ static final long[] jjbitVec4 = { private int jjMoveNfa_3(int startState, int curPos) { int startsAt = 0; - jjnewStateCnt = 36; + jjnewStateCnt = 41; int i = 1; jjstateSet[0] = startState; int kind = 0x7fffffff; @@ -125,14 +126,6 @@ private int jjMoveNfa_3(int startState, int curPos) { switch(jjstateSet[--i]) { - case 36: - case 25: - if ((0xfbfffcf8ffffd9ffL & l) == 0L) - break; - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); - break; case 0: if ((0xfbffd4f8ffffd9ffL & l) != 0L) { @@ -163,9 +156,19 @@ private int jjMoveNfa_3(int startState, int curPos) if (kind > 21) kind = 21; } - if (curChar == 38) + if (curChar == 47) + jjCheckNAddStates(8, 10); + else if (curChar == 38) jjstateSet[jjnewStateCnt++] = 4; break; + case 41: + case 25: + if ((0xfbfffcf8ffffd9ffL & l) == 0L) + break; + if (kind > 22) + kind = 22; + jjCheckNAddTwoStates(25, 26); + break; case 4: if (curChar == 38 && kind > 8) kind = 8; @@ -198,7 +201,7 @@ private int jjMoveNfa_3(int startState, int curPos) break; if (kind > 20) kind = 20; - jjAddStates(8, 9); + jjAddStates(11, 12); break; case 21: if (curChar == 46) @@ -228,30 +231,43 @@ private int jjMoveNfa_3(int startState, int curPos) jjCheckNAddTwoStates(25, 26); break; case 28: + case 30: + if (curChar == 47) + jjCheckNAddStates(8, 10); + break; + case 29: + if ((0xffff7fffffffffffL & l) != 0L) + jjCheckNAddStates(8, 10); + break; + case 32: + if (curChar == 47 && kind > 23) + kind = 23; + break; + case 33: if ((0x7bffd0f8ffffd9ffL & l) == 0L) break; if (kind > 19) kind = 19; jjCheckNAddStates(3, 7); break; - case 29: + case 34: if ((0x7bfff8f8ffffd9ffL & l) == 0L) break; if (kind > 19) kind = 19; - jjCheckNAddTwoStates(29, 30); + jjCheckNAddTwoStates(34, 35); break; - case 31: + case 36: if (kind > 19) kind = 19; - jjCheckNAddTwoStates(29, 30); + jjCheckNAddTwoStates(34, 35); break; - case 32: + case 37: if ((0x7bfff8f8ffffd9ffL & l) != 0L) - jjCheckNAddStates(10, 12); + jjCheckNAddStates(13, 15); break; - case 34: - jjCheckNAddStates(10, 12); + case 39: + jjCheckNAddStates(13, 15); break; default : break; } @@ -264,16 +280,6 @@ private int jjMoveNfa_3(int startState, int curPos) { switch(jjstateSet[--i]) { - case 36: - if ((0x97ffffff87ffffffL & l) != 0L) - { - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); - } - else if (curChar == 92) - jjCheckNAddTwoStates(27, 27); - break; case 0: if ((0x97ffffff87ffffffL & l) != 0L) { @@ -282,7 +288,7 @@ private int jjMoveNfa_3(int startState, int curPos) jjCheckNAddStates(3, 7); } else if (curChar == 92) - jjCheckNAddStates(13, 15); + jjCheckNAddStates(16, 18); else if (curChar == 126) { if (kind > 20) @@ -304,6 +310,16 @@ private int jjMoveNfa_3(int startState, int curPos) else if (curChar == 65) jjstateSet[jjnewStateCnt++] = 2; break; + case 41: + if ((0x97ffffff87ffffffL & l) != 0L) + { + if (kind > 22) + kind = 22; + jjCheckNAddTwoStates(25, 26); + } + else if (curChar == 92) + jjCheckNAddTwoStates(27, 27); + break; case 1: if (curChar == 68 && kind > 8) kind = 8; @@ -385,44 +401,51 @@ private int jjMoveNfa_3(int startState, int curPos) kind = 22; jjCheckNAddTwoStates(25, 26); break; - case 28: + case 29: + jjAddStates(8, 10); + break; + case 31: + if (curChar == 92) + jjstateSet[jjnewStateCnt++] = 30; + break; + case 33: if ((0x97ffffff87ffffffL & l) == 0L) break; if (kind > 19) kind = 19; jjCheckNAddStates(3, 7); break; - case 29: + case 34: if ((0x97ffffff87ffffffL & l) == 0L) break; if (kind > 19) kind = 19; - jjCheckNAddTwoStates(29, 30); - break; - case 30: - if (curChar == 92) - jjCheckNAddTwoStates(31, 31); - break; - case 31: - if (kind > 19) - kind = 19; - jjCheckNAddTwoStates(29, 30); - break; - case 32: - if ((0x97ffffff87ffffffL & l) != 0L) - jjCheckNAddStates(10, 12); - break; - case 33: - if (curChar == 92) - jjCheckNAddTwoStates(34, 34); - break; - case 34: - jjCheckNAddStates(10, 12); + jjCheckNAddTwoStates(34, 35); break; case 35: if (curChar == 92) + jjCheckNAddTwoStates(36, 36); + break; + case 36: + if (kind > 19) + kind = 19; + jjCheckNAddTwoStates(34, 35); + break; + case 37: + if ((0x97ffffff87ffffffL & l) != 0L) jjCheckNAddStates(13, 15); break; + case 38: + if (curChar == 92) + jjCheckNAddTwoStates(39, 39); + break; + case 39: + jjCheckNAddStates(13, 15); + break; + case 40: + if (curChar == 92) + jjCheckNAddStates(16, 18); + break; default : break; } } while(i != startsAt); @@ -438,14 +461,6 @@ private int jjMoveNfa_3(int startState, int curPos) { switch(jjstateSet[--i]) { - case 36: - case 25: - if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) - break; - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); - break; case 0: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { @@ -465,6 +480,14 @@ private int jjMoveNfa_3(int startState, int curPos) jjCheckNAddStates(3, 7); } break; + case 41: + case 25: + if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) + break; + if (kind > 22) + kind = 22; + jjCheckNAddTwoStates(25, 26); + break; case 15: case 17: if (jjCanMove_1(hiByte, i1, i2, l1, l2)) @@ -484,34 +507,38 @@ private int jjMoveNfa_3(int startState, int curPos) kind = 22; jjCheckNAddTwoStates(25, 26); break; - case 28: + case 29: + if (jjCanMove_1(hiByte, i1, i2, l1, l2)) + jjAddStates(8, 10); + break; + case 33: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) break; if (kind > 19) kind = 19; jjCheckNAddStates(3, 7); break; - case 29: + case 34: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) break; if (kind > 19) kind = 19; - jjCheckNAddTwoStates(29, 30); + jjCheckNAddTwoStates(34, 35); break; - case 31: + case 36: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) break; if (kind > 19) kind = 19; - jjCheckNAddTwoStates(29, 30); + jjCheckNAddTwoStates(34, 35); break; - case 32: + case 37: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(10, 12); + jjCheckNAddStates(13, 15); break; - case 34: + case 39: if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(10, 12); + jjCheckNAddStates(13, 15); break; default : break; } @@ -524,7 +551,7 @@ private int jjMoveNfa_3(int startState, int curPos) kind = 0x7fffffff; } ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 36 - (jjnewStateCnt = startsAt))) + if ((i = jjnewStateCnt) == (startsAt = 41 - (jjnewStateCnt = startsAt))) return curPos; try { curChar = input_stream.readChar(); } catch(java.io.IOException e) { return curPos; } @@ -535,9 +562,9 @@ private final int jjStopStringLiteralDfa_1(int pos, long active0) switch (pos) { case 0: - if ((active0 & 0x40000000L) != 0L) + if ((active0 & 0x80000000L) != 0L) { - jjmatchedKind = 33; + jjmatchedKind = 34; return 6; } return -1; @@ -554,9 +581,9 @@ private int jjMoveStringLiteralDfa0_1() switch(curChar) { case 84: - return jjMoveStringLiteralDfa1_1(0x40000000L); + return jjMoveStringLiteralDfa1_1(0x80000000L); case 125: - return jjStopAtPos(0, 31); + return jjStopAtPos(0, 32); default : return jjMoveNfa_1(0, 0); } @@ -571,8 +598,8 @@ private int jjMoveStringLiteralDfa1_1(long active0) switch(curChar) { case 79: - if ((active0 & 0x40000000L) != 0L) - return jjStartNfaWithStates_1(1, 30, 6); + if ((active0 & 0x80000000L) != 0L) + return jjStartNfaWithStates_1(1, 31, 6); break; default : break; @@ -608,8 +635,8 @@ private int jjMoveNfa_1(int startState, int curPos) case 0: if ((0xfffffffeffffffffL & l) != 0L) { - if (kind > 33) - kind = 33; + if (kind > 34) + kind = 34; jjCheckNAdd(6); } if ((0x100002600L & l) != 0L) @@ -626,21 +653,21 @@ private int jjMoveNfa_1(int startState, int curPos) break; case 2: if ((0xfffffffbffffffffL & l) != 0L) - jjCheckNAddStates(16, 18); + jjCheckNAddStates(19, 21); break; case 3: if (curChar == 34) - jjCheckNAddStates(16, 18); + jjCheckNAddStates(19, 21); break; case 5: - if (curChar == 34 && kind > 32) - kind = 32; + if (curChar == 34 && kind > 33) + kind = 33; break; case 6: if ((0xfffffffeffffffffL & l) == 0L) break; - if (kind > 33) - kind = 33; + if (kind > 34) + kind = 34; jjCheckNAdd(6); break; default : break; @@ -658,12 +685,12 @@ private int jjMoveNfa_1(int startState, int curPos) case 6: if ((0xdfffffffffffffffL & l) == 0L) break; - if (kind > 33) - kind = 33; + if (kind > 34) + kind = 34; jjCheckNAdd(6); break; case 2: - jjAddStates(16, 18); + jjAddStates(19, 21); break; case 4: if (curChar == 92) @@ -692,20 +719,20 @@ private int jjMoveNfa_1(int startState, int curPos) } if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { - if (kind > 33) - kind = 33; + if (kind > 34) + kind = 34; jjCheckNAdd(6); } break; case 2: if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - jjAddStates(16, 18); + jjAddStates(19, 21); break; case 6: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) break; - if (kind > 33) - kind = 33; + if (kind > 34) + kind = 34; jjCheckNAdd(6); break; default : break; @@ -750,9 +777,9 @@ private int jjMoveNfa_0(int startState, int curPos) case 0: if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 25) - kind = 25; - jjAddStates(19, 20); + if (kind > 26) + kind = 26; + jjAddStates(22, 23); break; case 1: if (curChar == 46) @@ -761,8 +788,8 @@ private int jjMoveNfa_0(int startState, int curPos) case 2: if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 25) - kind = 25; + if (kind > 26) + kind = 26; jjCheckNAdd(2); break; default : break; @@ -813,9 +840,9 @@ private final int jjStopStringLiteralDfa_2(int pos, long active0) switch (pos) { case 0: - if ((active0 & 0x4000000L) != 0L) + if ((active0 & 0x8000000L) != 0L) { - jjmatchedKind = 29; + jjmatchedKind = 30; return 6; } return -1; @@ -832,9 +859,9 @@ private int jjMoveStringLiteralDfa0_2() switch(curChar) { case 84: - return jjMoveStringLiteralDfa1_2(0x4000000L); + return jjMoveStringLiteralDfa1_2(0x8000000L); case 93: - return jjStopAtPos(0, 27); + return jjStopAtPos(0, 28); default : return jjMoveNfa_2(0, 0); } @@ -849,8 +876,8 @@ private int jjMoveStringLiteralDfa1_2(long active0) switch(curChar) { case 79: - if ((active0 & 0x4000000L) != 0L) - return jjStartNfaWithStates_2(1, 26, 6); + if ((active0 & 0x8000000L) != 0L) + return jjStartNfaWithStates_2(1, 27, 6); break; default : break; @@ -886,8 +913,8 @@ private int jjMoveNfa_2(int startState, int curPos) case 0: if ((0xfffffffeffffffffL & l) != 0L) { - if (kind > 29) - kind = 29; + if (kind > 30) + kind = 30; jjCheckNAdd(6); } if ((0x100002600L & l) != 0L) @@ -904,21 +931,21 @@ private int jjMoveNfa_2(int startState, int curPos) break; case 2: if ((0xfffffffbffffffffL & l) != 0L) - jjCheckNAddStates(16, 18); + jjCheckNAddStates(19, 21); break; case 3: if (curChar == 34) - jjCheckNAddStates(16, 18); + jjCheckNAddStates(19, 21); break; case 5: - if (curChar == 34 && kind > 28) - kind = 28; + if (curChar == 34 && kind > 29) + kind = 29; break; case 6: if ((0xfffffffeffffffffL & l) == 0L) break; - if (kind > 29) - kind = 29; + if (kind > 30) + kind = 30; jjCheckNAdd(6); break; default : break; @@ -936,12 +963,12 @@ private int jjMoveNfa_2(int startState, int curPos) case 6: if ((0xffffffffdfffffffL & l) == 0L) break; - if (kind > 29) - kind = 29; + if (kind > 30) + kind = 30; jjCheckNAdd(6); break; case 2: - jjAddStates(16, 18); + jjAddStates(19, 21); break; case 4: if (curChar == 92) @@ -970,20 +997,20 @@ private int jjMoveNfa_2(int startState, int curPos) } if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { - if (kind > 29) - kind = 29; + if (kind > 30) + kind = 30; jjCheckNAdd(6); } break; case 2: if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - jjAddStates(16, 18); + jjAddStates(19, 21); break; case 6: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) break; - if (kind > 29) - kind = 29; + if (kind > 30) + kind = 30; jjCheckNAdd(6); break; default : break; @@ -1004,8 +1031,8 @@ private int jjMoveNfa_2(int startState, int curPos) } } static final int[] jjnextStates = { - 15, 16, 18, 29, 32, 23, 33, 30, 20, 21, 32, 23, 33, 31, 34, 27, - 2, 4, 5, 0, 1, + 15, 16, 18, 34, 37, 23, 38, 35, 29, 31, 32, 20, 21, 37, 23, 38, + 36, 39, 27, 2, 4, 5, 0, 1, }; private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2) { @@ -1047,8 +1074,8 @@ private static final boolean jjCanMove_2(int hiByte, int i1, int i2, long l1, lo /** Token literal values. */ public static final String[] jjstrLiteralImages = { "", null, null, null, null, null, null, null, null, null, null, "\53", "\55", -"\50", "\51", "\72", "\52", "\136", null, null, null, null, null, "\133", "\173", -null, "\124\117", "\135", null, null, "\124\117", "\175", null, null, }; +"\50", "\51", "\72", "\52", "\136", null, null, null, null, null, null, "\133", +"\173", null, "\124\117", "\135", null, null, "\124\117", "\175", null, null, }; /** Lexer state names. */ public static final String[] lexStateNames = { @@ -1060,18 +1087,18 @@ public static final String[] lexStateNames = { /** Lex State array. */ public static final int[] jjnewLexState = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, 1, - 3, -1, 3, -1, -1, -1, 3, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, 2, + 1, 3, -1, 3, -1, -1, -1, 3, -1, -1, }; static final long[] jjtoToken = { - 0x3ffffff01L, + 0x7ffffff01L, }; static final long[] jjtoSkip = { 0x80L, }; protected CharStream input_stream; -private final int[] jjrounds = new int[36]; -private final int[] jjstateSet = new int[72]; +private final int[] jjrounds = new int[41]; +private final int[] jjstateSet = new int[82]; protected char curChar; /** Constructor. */ public QueryParserTokenManager(CharStream stream){ @@ -1096,7 +1123,7 @@ private void ReInitRounds() { int i; jjround = 0x80000001; - for (i = 36; i-- > 0;) + for (i = 41; i-- > 0;) jjrounds[i] = 0x80000000; } diff --git a/lucene/src/java/org/apache/lucene/queryParser/Token.java b/lucene/src/java/org/apache/lucene/queryParser/Token.java index 2c665d6ab22..97677981cd7 100644 --- a/lucene/src/java/org/apache/lucene/queryParser/Token.java +++ b/lucene/src/java/org/apache/lucene/queryParser/Token.java @@ -121,4 +121,4 @@ public class Token { } } -/* JavaCC - OriginalChecksum=c147cc166a7cf8812c7c39bc8c5eb868 (do not edit this line) */ +/* JavaCC - OriginalChecksum=37b1923f964a5a434f5ea3d6952ff200 (do not edit this line) */ diff --git a/lucene/src/java/org/apache/lucene/queryParser/TokenMgrError.java b/lucene/src/java/org/apache/lucene/queryParser/TokenMgrError.java index b4ffd429b2b..a3c46b70cab 100644 --- a/lucene/src/java/org/apache/lucene/queryParser/TokenMgrError.java +++ b/lucene/src/java/org/apache/lucene/queryParser/TokenMgrError.java @@ -138,4 +138,4 @@ public class TokenMgrError extends Error this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); } } -/* JavaCC - OriginalChecksum=1c94e13236c7e0121e49427992341ee3 (do not edit this line) */ +/* JavaCC - OriginalChecksum=334e679cf1a88b3070bb8e3d80ee3f5e (do not edit this line) */ diff --git a/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java b/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java index 6cc988f8533..7cf3688b792 100644 --- a/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java +++ b/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java @@ -54,10 +54,12 @@ import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LocalizedTestCase; import org.apache.lucene.util.automaton.BasicAutomata; @@ -1054,6 +1056,33 @@ public class TestQueryParser extends LocalizedTestCase { } + public void testRegexps() throws Exception { + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false)); + RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]")); + assertEquals(q, qp.parse("/[a-z][123]/")); + qp.setLowercaseExpandedTerms(true); + assertEquals(q, qp.parse("/[A-Z][123]/")); + q.setBoost(0.5f); + assertEquals(q, qp.parse("/[A-Z][123]/^0.5")); + qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + assertTrue(qp.parse("/[A-Z][123]/^0.5") instanceof RegexpQuery); + assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)qp.parse("/[A-Z][123]/^0.5")).getRewriteMethod()); + assertEquals(q, qp.parse("/[A-Z][123]/^0.5")); + qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); + + Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]")); + assertEquals(escaped, qp.parse("/[a-z]\\/[123]/")); + Query escaped2 = new RegexpQuery(new Term("field", "[a-z]\\*[123]")); + assertEquals(escaped2, qp.parse("/[a-z]\\*[123]/")); + + BooleanQuery complex = new BooleanQuery(); + complex.add(new RegexpQuery(new Term("field", "[a-z]\\/[123]")), Occur.MUST); + complex.add(new TermQuery(new Term("path", "/etc/init.d/")), Occur.MUST); + complex.add(new TermQuery(new Term("field", "/etc/init[.]d/lucene/")), Occur.SHOULD); + assertEquals(complex, qp.parse("/[a-z]\\/[123]/ AND path:/etc/init.d/ OR /etc\\/init\\[.\\]d/lucene/ ")); + } + public void testStopwords() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton()); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "a", new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true));