LUCENE-2604: Added RegexpQuery support to QueryParser. Regular expressions are now directly supported by the standard QueryParser.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@990836 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2010-08-30 16:04:49 +00:00
parent c5d2165437
commit 838e19beae
34 changed files with 984 additions and 384 deletions

View File

@ -131,6 +131,10 @@ API Changes
New features
* LUCENE-2604: Added RegexpQuery support to QueryParser. Regular expressions
are directly supported by the standard queryparser.
(Simon Willnauer, Robert Muir)
* LUCENE-1606, LUCENE-2089: Adds AutomatonQuery, a MultiTermQuery that
matches terms against a finite-state machine. Implement WildcardQuery
and FuzzyQuery with finite-state methods. Adds RegexpQuery.

View File

@ -9,6 +9,9 @@ Build
New Features
* LUCENE-2604: Added RegexpQuery support to contrib/queryparser.
(Simon Willnauer, Robert Muir)
* LUCENE-2500: Added DirectIOLinuxDirectory, a Linux-specific
Directory impl that uses the O_DIRECT flag to bypass the buffer
cache. This is useful to prevent segment merging from evicting

View File

@ -109,4 +109,4 @@ public interface CharStream {
void Done();
}
/* JavaCC - OriginalChecksum=8cc617b193267dc876ef9699367c8186 (do not edit this line) */
/* JavaCC - OriginalChecksum=7bcd45d10a032f1c9da64691d073cf75 (do not edit this line) */

View File

@ -195,4 +195,4 @@ public class ParseException extends Exception {
}
}
/* JavaCC - OriginalChecksum=15fbbe38a36c8ac9e2740d030624c321 (do not edit this line) */
/* JavaCC - OriginalChecksum=4440e368eeef562faffeca98a200334b (do not edit this line) */

View File

@ -17,9 +17,11 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
@ -95,6 +97,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
private Operator operator = OR_OPERATOR;
boolean lowercaseExpandedTerms = true;
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
Analyzer analyzer;
String field;
@ -232,6 +235,27 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
public boolean getLowercaseExpandedTerms() {
return lowercaseExpandedTerms;
}
/**
* By default PrecedenceQueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
* when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
* a) Runs faster b) Does not have the scarcity of terms unduly influence score
* c) avoids any "TooManyBooleanClauses" exception.
* However, if your application really needs to use the
* old-fashioned BooleanQuery expansion rewriting and the above
* points are not relevant then use this to change
* the rewrite method.
*/
public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
multiTermRewriteMethod = method;
}
/**
* @see #setMultiTermRewriteMethod
*/
public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() {
return multiTermRewriteMethod;
}
/**
* Set locale used by date range parsing.
@ -426,7 +450,9 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
}
catch (Exception e) { }
return new TermRangeQuery(field, part1, part2, inclusive, inclusive);
final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
/**
@ -500,7 +526,9 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr);
return new WildcardQuery(t);
final WildcardQuery query = new WildcardQuery(t);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
/**
@ -532,7 +560,40 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr);
return new PrefixQuery(t);
final PrefixQuery query = new PrefixQuery(t);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
/**
* Factory method for generating a query. Called when parser
* parses an input term token that contains a regular expression
* query.
*<p>
* Depending on settings, pattern term may be lower-cased
* automatically. It will not go through the default Analyzer,
* however, since normal Analyzers are unlikely to work properly
* with regular expression templates.
*<p>
* Can be overridden by extending classes, to provide custom handling for
* regular expression queries, which may be necessary due to missing analyzer
* calls.
*
* @param field Name of the field query will use.
* @param termStr Term token that contains a regular expression
*
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
protected Query getRegexpQuery(String field, String termStr) throws ParseException
{
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
final Term regexp = new Term(field, termStr);
final RegexpQuery query = new RegexpQuery(regexp);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
/**
@ -675,6 +736,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
case TERM:
case PREFIXTERM:
case WILDTERM:
case REGEXPTERM:
case RANGEIN_START:
case RANGEEX_START:
case NUMBER:
@ -750,6 +812,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
case TERM:
case PREFIXTERM:
case WILDTERM:
case REGEXPTERM:
case RANGEIN_START:
case RANGEEX_START:
case NUMBER:
@ -790,11 +853,14 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
boolean regexp = false;
Query q;
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case TERM:
case PREFIXTERM:
case WILDTERM:
case REGEXPTERM:
case NUMBER:
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case TERM:
@ -808,6 +874,10 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
term = jj_consume_token(WILDTERM);
wildcard=true;
break;
case REGEXPTERM:
term = jj_consume_token(REGEXPTERM);
regexp=true;
break;
case NUMBER:
term = jj_consume_token(NUMBER);
break;
@ -850,6 +920,8 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
q = getPrefixQuery(field,
discardEscapeChar(term.image.substring
(0, term.image.length()-1)));
} else if (regexp) {
q = getRegexpQuery(field, term.image.substring(1, term.image.length()-1));
} else if (fuzzy) {
float fms = fuzzyMinSim;
try {
@ -1055,11 +1127,16 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
private int jj_gen;
final private int[] jj_la1 = new int[24];
static private int[] jj_la1_0;
static private int[] jj_la1_1;
static {
jj_la1_init_0();
jj_la1_init_1();
}
private static void jj_la1_init_0() {
jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0xfb1f00,0x100,0x80,0x8000,0xfb1000,0x9a0000,0x40000,0x40000,0x8000,0xc000000,0x1000000,0xc000000,0x8000,0xc0000000,0x10000000,0xc0000000,0x8000,0x40000,0x8000,0xfb0000,};
jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0x1fb1f00,0x100,0x80,0x8000,0x1fb1000,0x13a0000,0x40000,0x40000,0x8000,0x18000000,0x2000000,0x18000000,0x8000,0x80000000,0x20000000,0x80000000,0x8000,0x40000,0x8000,0x1fb0000,};
}
private static void jj_la1_init_1() {
jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x1,0x0,0x0,0x0,0x0,};
}
final private JJCalls[] jj_2_rtns = new JJCalls[1];
private boolean jj_rescan = false;
@ -1213,7 +1290,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
/** Generate ParseException. */
public ParseException generateParseException() {
jj_expentries.clear();
boolean[] la1tokens = new boolean[32];
boolean[] la1tokens = new boolean[33];
if (jj_kind >= 0) {
la1tokens[jj_kind] = true;
jj_kind = -1;
@ -1224,10 +1301,13 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
if ((jj_la1_0[i] & (1<<j)) != 0) {
la1tokens[j] = true;
}
if ((jj_la1_1[i] & (1<<j)) != 0) {
la1tokens[32+j] = true;
}
}
}
}
for (int i = 0; i < 32; i++) {
for (int i = 0; i < 33; i++) {
if (la1tokens[i]) {
jj_expentry = new int[1];
jj_expentry[0] = i;

View File

@ -41,9 +41,11 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
@ -119,6 +121,7 @@ public class PrecedenceQueryParser {
private Operator operator = OR_OPERATOR;
boolean lowercaseExpandedTerms = true;
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
Analyzer analyzer;
String field;
@ -256,6 +259,27 @@ public class PrecedenceQueryParser {
public boolean getLowercaseExpandedTerms() {
return lowercaseExpandedTerms;
}
/**
* By default PrecedenceQueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
* when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
* a) Runs faster b) Does not have the scarcity of terms unduly influence score
* c) avoids any "TooManyBooleanClauses" exception.
* However, if your application really needs to use the
* old-fashioned BooleanQuery expansion rewriting and the above
* points are not relevant then use this to change
* the rewrite method.
*/
public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
multiTermRewriteMethod = method;
}
/**
* @see #setMultiTermRewriteMethod
*/
public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() {
return multiTermRewriteMethod;
}
/**
* Set locale used by date range parsing.
@ -450,7 +474,9 @@ public class PrecedenceQueryParser {
}
catch (Exception e) { }
return new TermRangeQuery(field, part1, part2, inclusive, inclusive);
final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
/**
@ -524,7 +550,9 @@ public class PrecedenceQueryParser {
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr);
return new WildcardQuery(t);
final WildcardQuery query = new WildcardQuery(t);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
/**
@ -556,7 +584,40 @@ public class PrecedenceQueryParser {
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr);
return new PrefixQuery(t);
final PrefixQuery query = new PrefixQuery(t);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
/**
* Factory method for generating a query. Called when parser
* parses an input term token that contains a regular expression
* query.
*<p>
* Depending on settings, pattern term may be lower-cased
* automatically. It will not go through the default Analyzer,
* however, since normal Analyzers are unlikely to work properly
* with regular expression templates.
*<p>
* Can be overridden by extending classes, to provide custom handling for
* regular expression queries, which may be necessary due to missing analyzer
* calls.
*
* @param field Name of the field query will use.
* @param termStr Term token that contains a regular expression
*
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
protected Query getRegexpQuery(String field, String termStr) throws ParseException
{
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
final Term regexp = new Term(field, termStr);
final RegexpQuery query = new RegexpQuery(regexp);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
/**
@ -678,6 +739,7 @@ PARSER_END(PrecedenceQueryParser)
| <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
| <WILDTERM: <_TERM_START_CHAR>
(<_TERM_CHAR> | ( [ "*", "?" ] ))* >
| <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* "/" >
| <RANGEIN_START: "[" > : RangeIn
| <RANGEEX_START: "{" > : RangeEx
}
@ -813,6 +875,8 @@ Query Term(String field) : {
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
boolean regexp = false;
Query q;
}
{
@ -821,6 +885,7 @@ Query Term(String field) : {
term=<TERM>
| term=<PREFIXTERM> { prefix=true; }
| term=<WILDTERM> { wildcard=true; }
| term=<REGEXPTERM> { regexp=true; }
| term=<NUMBER>
)
[ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
@ -833,6 +898,8 @@ Query Term(String field) : {
q = getPrefixQuery(field,
discardEscapeChar(term.image.substring
(0, term.image.length()-1)));
} else if (regexp) {
q = getRegexpQuery(field, term.image.substring(1, term.image.length()-1));
} else if (fuzzy) {
float fms = fuzzyMinSim;
try {

View File

@ -49,27 +49,29 @@ public interface PrecedenceQueryParserConstants {
/** RegularExpression Id. */
int WILDTERM = 20;
/** RegularExpression Id. */
int RANGEIN_START = 21;
int REGEXPTERM = 21;
/** RegularExpression Id. */
int RANGEEX_START = 22;
int RANGEIN_START = 22;
/** RegularExpression Id. */
int NUMBER = 23;
int RANGEEX_START = 23;
/** RegularExpression Id. */
int RANGEIN_TO = 24;
int NUMBER = 24;
/** RegularExpression Id. */
int RANGEIN_END = 25;
int RANGEIN_TO = 25;
/** RegularExpression Id. */
int RANGEIN_QUOTED = 26;
int RANGEIN_END = 26;
/** RegularExpression Id. */
int RANGEIN_GOOP = 27;
int RANGEIN_QUOTED = 27;
/** RegularExpression Id. */
int RANGEEX_TO = 28;
int RANGEIN_GOOP = 28;
/** RegularExpression Id. */
int RANGEEX_END = 29;
int RANGEEX_TO = 29;
/** RegularExpression Id. */
int RANGEEX_QUOTED = 30;
int RANGEEX_END = 30;
/** RegularExpression Id. */
int RANGEEX_GOOP = 31;
int RANGEEX_QUOTED = 31;
/** RegularExpression Id. */
int RANGEEX_GOOP = 32;
/** Lexical state. */
int Boost = 0;
@ -103,6 +105,7 @@ public interface PrecedenceQueryParserConstants {
"<FUZZY_SLOP>",
"<PREFIXTERM>",
"<WILDTERM>",
"<REGEXPTERM>",
"\"[\"",
"\"{\"",
"<NUMBER>",

View File

@ -15,9 +15,11 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
@ -66,11 +68,11 @@ private int jjMoveStringLiteralDfa0_3()
case 58:
return jjStopAtPos(0, 14);
case 91:
return jjStopAtPos(0, 21);
return jjStopAtPos(0, 22);
case 94:
return jjStopAtPos(0, 15);
case 123:
return jjStopAtPos(0, 22);
return jjStopAtPos(0, 23);
default :
return jjMoveNfa_3(0, 0);
}
@ -84,7 +86,7 @@ static final long[] jjbitVec2 = {
private int jjMoveNfa_3(int startState, int curPos)
{
int startsAt = 0;
jjnewStateCnt = 33;
jjnewStateCnt = 38;
int i = 1;
jjstateSet[0] = startState;
int kind = 0x7fffffff;
@ -118,7 +120,9 @@ private int jjMoveNfa_3(int startState, int curPos)
if (kind > 9)
kind = 9;
}
if (curChar == 38)
if (curChar == 47)
jjCheckNAddStates(7, 9);
else if (curChar == 38)
jjstateSet[jjnewStateCnt++] = 4;
break;
case 4:
@ -150,7 +154,7 @@ private int jjMoveNfa_3(int startState, int curPos)
break;
if (kind > 18)
kind = 18;
jjAddStates(7, 8);
jjAddStates(10, 11);
break;
case 19:
if (curChar == 46)
@ -164,51 +168,64 @@ private int jjMoveNfa_3(int startState, int curPos)
jjCheckNAdd(20);
break;
case 21:
case 23:
if (curChar == 47)
jjCheckNAddStates(7, 9);
break;
case 22:
if ((0xffff7fffffffffffL & l) != 0L)
jjCheckNAddStates(7, 9);
break;
case 25:
if (curChar == 47 && kind > 21)
kind = 21;
break;
case 26:
if ((0x7bffd0f8ffffd9ffL & l) == 0L)
break;
if (kind > 17)
kind = 17;
jjCheckNAddStates(0, 6);
break;
case 22:
case 27:
if ((0x7bfff8f8ffffd9ffL & l) == 0L)
break;
if (kind > 17)
kind = 17;
jjCheckNAddTwoStates(22, 23);
jjCheckNAddTwoStates(27, 28);
break;
case 24:
case 29:
if ((0x84002f0600000000L & l) == 0L)
break;
if (kind > 17)
kind = 17;
jjCheckNAddTwoStates(22, 23);
jjCheckNAddTwoStates(27, 28);
break;
case 25:
case 30:
if ((0x7bfff8f8ffffd9ffL & l) != 0L)
jjCheckNAddStates(9, 11);
jjCheckNAddStates(12, 14);
break;
case 26:
case 31:
if (curChar == 42 && kind > 19)
kind = 19;
break;
case 28:
case 33:
if ((0x84002f0600000000L & l) != 0L)
jjCheckNAddStates(9, 11);
jjCheckNAddStates(12, 14);
break;
case 29:
case 34:
if ((0xfbfffcf8ffffd9ffL & l) == 0L)
break;
if (kind > 20)
kind = 20;
jjCheckNAddTwoStates(29, 30);
jjCheckNAddTwoStates(34, 35);
break;
case 31:
case 36:
if ((0x84002f0600000000L & l) == 0L)
break;
if (kind > 20)
kind = 20;
jjCheckNAddTwoStates(29, 30);
jjCheckNAddTwoStates(34, 35);
break;
default : break;
}
@ -235,7 +252,7 @@ private int jjMoveNfa_3(int startState, int curPos)
jjstateSet[jjnewStateCnt++] = 18;
}
if (curChar == 92)
jjCheckNAddStates(12, 14);
jjCheckNAddStates(15, 17);
else if (curChar == 78)
jjstateSet[jjnewStateCnt++] = 11;
else if (curChar == 124)
@ -286,7 +303,7 @@ private int jjMoveNfa_3(int startState, int curPos)
jjstateSet[jjnewStateCnt++] = 11;
break;
case 15:
jjAddStates(15, 16);
jjAddStates(18, 19);
break;
case 17:
if (curChar != 126)
@ -295,65 +312,72 @@ private int jjMoveNfa_3(int startState, int curPos)
kind = 18;
jjstateSet[jjnewStateCnt++] = 18;
break;
case 21:
case 22:
jjAddStates(7, 9);
break;
case 24:
if (curChar == 92)
jjstateSet[jjnewStateCnt++] = 23;
break;
case 26:
if ((0x97ffffff97ffffffL & l) == 0L)
break;
if (kind > 17)
kind = 17;
jjCheckNAddStates(0, 6);
break;
case 22:
case 27:
if ((0x97ffffff97ffffffL & l) == 0L)
break;
if (kind > 17)
kind = 17;
jjCheckNAddTwoStates(22, 23);
break;
case 23:
if (curChar == 92)
jjCheckNAddTwoStates(24, 24);
break;
case 24:
if ((0x6800000078000000L & l) == 0L)
break;
if (kind > 17)
kind = 17;
jjCheckNAddTwoStates(22, 23);
break;
case 25:
if ((0x97ffffff97ffffffL & l) != 0L)
jjCheckNAddStates(9, 11);
break;
case 27:
if (curChar == 92)
jjCheckNAddTwoStates(28, 28);
jjCheckNAddTwoStates(27, 28);
break;
case 28:
if ((0x6800000078000000L & l) != 0L)
jjCheckNAddStates(9, 11);
if (curChar == 92)
jjCheckNAddTwoStates(29, 29);
break;
case 29:
if ((0x97ffffff97ffffffL & l) == 0L)
break;
if (kind > 20)
kind = 20;
jjCheckNAddTwoStates(29, 30);
break;
case 30:
if (curChar == 92)
jjCheckNAddTwoStates(31, 31);
break;
case 31:
if ((0x6800000078000000L & l) == 0L)
break;
if (kind > 20)
kind = 20;
jjCheckNAddTwoStates(29, 30);
if (kind > 17)
kind = 17;
jjCheckNAddTwoStates(27, 28);
break;
case 30:
if ((0x97ffffff97ffffffL & l) != 0L)
jjCheckNAddStates(12, 14);
break;
case 32:
if (curChar == 92)
jjCheckNAddTwoStates(33, 33);
break;
case 33:
if ((0x6800000078000000L & l) != 0L)
jjCheckNAddStates(12, 14);
break;
case 34:
if ((0x97ffffff97ffffffL & l) == 0L)
break;
if (kind > 20)
kind = 20;
jjCheckNAddTwoStates(34, 35);
break;
case 35:
if (curChar == 92)
jjCheckNAddTwoStates(36, 36);
break;
case 36:
if ((0x6800000078000000L & l) == 0L)
break;
if (kind > 20)
kind = 20;
jjCheckNAddTwoStates(34, 35);
break;
case 37:
if (curChar == 92)
jjCheckNAddStates(15, 17);
break;
default : break;
}
} while(i != startsAt);
@ -378,25 +402,29 @@ private int jjMoveNfa_3(int startState, int curPos)
break;
case 15:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
jjAddStates(15, 16);
jjAddStates(18, 19);
break;
case 22:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
jjAddStates(7, 9);
break;
case 27:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
if (kind > 17)
kind = 17;
jjCheckNAddTwoStates(22, 23);
jjCheckNAddTwoStates(27, 28);
break;
case 25:
case 30:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
jjCheckNAddStates(9, 11);
jjCheckNAddStates(12, 14);
break;
case 29:
case 34:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
if (kind > 20)
kind = 20;
jjCheckNAddTwoStates(29, 30);
jjCheckNAddTwoStates(34, 35);
break;
default : break;
}
@ -409,7 +437,7 @@ private int jjMoveNfa_3(int startState, int curPos)
kind = 0x7fffffff;
}
++curPos;
if ((i = jjnewStateCnt) == (startsAt = 33 - (jjnewStateCnt = startsAt)))
if ((i = jjnewStateCnt) == (startsAt = 38 - (jjnewStateCnt = startsAt)))
return curPos;
try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) { return curPos; }
@ -420,9 +448,9 @@ private final int jjStopStringLiteralDfa_1(int pos, long active0)
switch (pos)
{
case 0:
if ((active0 & 0x10000000L) != 0L)
if ((active0 & 0x20000000L) != 0L)
{
jjmatchedKind = 31;
jjmatchedKind = 32;
return 4;
}
return -1;
@ -439,9 +467,9 @@ private int jjMoveStringLiteralDfa0_1()
switch(curChar)
{
case 84:
return jjMoveStringLiteralDfa1_1(0x10000000L);
return jjMoveStringLiteralDfa1_1(0x20000000L);
case 125:
return jjStopAtPos(0, 29);
return jjStopAtPos(0, 30);
default :
return jjMoveNfa_1(0, 0);
}
@ -456,8 +484,8 @@ private int jjMoveStringLiteralDfa1_1(long active0)
switch(curChar)
{
case 79:
if ((active0 & 0x10000000L) != 0L)
return jjStartNfaWithStates_1(1, 28, 4);
if ((active0 & 0x20000000L) != 0L)
return jjStartNfaWithStates_1(1, 29, 4);
break;
default :
break;
@ -493,8 +521,8 @@ private int jjMoveNfa_1(int startState, int curPos)
case 0:
if ((0xfffffffeffffffffL & l) != 0L)
{
if (kind > 31)
kind = 31;
if (kind > 32)
kind = 32;
jjCheckNAdd(4);
}
if ((0x100002600L & l) != 0L)
@ -514,14 +542,14 @@ private int jjMoveNfa_1(int startState, int curPos)
jjCheckNAddTwoStates(2, 3);
break;
case 3:
if (curChar == 34 && kind > 30)
kind = 30;
if (curChar == 34 && kind > 31)
kind = 31;
break;
case 4:
if ((0xfffffffeffffffffL & l) == 0L)
break;
if (kind > 31)
kind = 31;
if (kind > 32)
kind = 32;
jjCheckNAdd(4);
break;
default : break;
@ -539,12 +567,12 @@ private int jjMoveNfa_1(int startState, int curPos)
case 4:
if ((0xdfffffffffffffffL & l) == 0L)
break;
if (kind > 31)
kind = 31;
if (kind > 32)
kind = 32;
jjCheckNAdd(4);
break;
case 2:
jjAddStates(17, 18);
jjAddStates(20, 21);
break;
default : break;
}
@ -565,13 +593,13 @@ private int jjMoveNfa_1(int startState, int curPos)
case 4:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
if (kind > 31)
kind = 31;
if (kind > 32)
kind = 32;
jjCheckNAdd(4);
break;
case 2:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
jjAddStates(17, 18);
jjAddStates(20, 21);
break;
default : break;
}
@ -615,9 +643,9 @@ private int jjMoveNfa_0(int startState, int curPos)
case 0:
if ((0x3ff000000000000L & l) == 0L)
break;
if (kind > 23)
kind = 23;
jjAddStates(19, 20);
if (kind > 24)
kind = 24;
jjAddStates(22, 23);
break;
case 1:
if (curChar == 46)
@ -626,8 +654,8 @@ private int jjMoveNfa_0(int startState, int curPos)
case 2:
if ((0x3ff000000000000L & l) == 0L)
break;
if (kind > 23)
kind = 23;
if (kind > 24)
kind = 24;
jjCheckNAdd(2);
break;
default : break;
@ -678,9 +706,9 @@ private final int jjStopStringLiteralDfa_2(int pos, long active0)
switch (pos)
{
case 0:
if ((active0 & 0x1000000L) != 0L)
if ((active0 & 0x2000000L) != 0L)
{
jjmatchedKind = 27;
jjmatchedKind = 28;
return 4;
}
return -1;
@ -697,9 +725,9 @@ private int jjMoveStringLiteralDfa0_2()
switch(curChar)
{
case 84:
return jjMoveStringLiteralDfa1_2(0x1000000L);
return jjMoveStringLiteralDfa1_2(0x2000000L);
case 93:
return jjStopAtPos(0, 25);
return jjStopAtPos(0, 26);
default :
return jjMoveNfa_2(0, 0);
}
@ -714,8 +742,8 @@ private int jjMoveStringLiteralDfa1_2(long active0)
switch(curChar)
{
case 79:
if ((active0 & 0x1000000L) != 0L)
return jjStartNfaWithStates_2(1, 24, 4);
if ((active0 & 0x2000000L) != 0L)
return jjStartNfaWithStates_2(1, 25, 4);
break;
default :
break;
@ -751,8 +779,8 @@ private int jjMoveNfa_2(int startState, int curPos)
case 0:
if ((0xfffffffeffffffffL & l) != 0L)
{
if (kind > 27)
kind = 27;
if (kind > 28)
kind = 28;
jjCheckNAdd(4);
}
if ((0x100002600L & l) != 0L)
@ -772,14 +800,14 @@ private int jjMoveNfa_2(int startState, int curPos)
jjCheckNAddTwoStates(2, 3);
break;
case 3:
if (curChar == 34 && kind > 26)
kind = 26;
if (curChar == 34 && kind > 27)
kind = 27;
break;
case 4:
if ((0xfffffffeffffffffL & l) == 0L)
break;
if (kind > 27)
kind = 27;
if (kind > 28)
kind = 28;
jjCheckNAdd(4);
break;
default : break;
@ -797,12 +825,12 @@ private int jjMoveNfa_2(int startState, int curPos)
case 4:
if ((0xffffffffdfffffffL & l) == 0L)
break;
if (kind > 27)
kind = 27;
if (kind > 28)
kind = 28;
jjCheckNAdd(4);
break;
case 2:
jjAddStates(17, 18);
jjAddStates(20, 21);
break;
default : break;
}
@ -823,13 +851,13 @@ private int jjMoveNfa_2(int startState, int curPos)
case 4:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
if (kind > 27)
kind = 27;
if (kind > 28)
kind = 28;
jjCheckNAdd(4);
break;
case 2:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
jjAddStates(17, 18);
jjAddStates(20, 21);
break;
default : break;
}
@ -849,8 +877,8 @@ private int jjMoveNfa_2(int startState, int curPos)
}
}
static final int[] jjnextStates = {
22, 25, 26, 29, 30, 27, 23, 18, 19, 25, 26, 27, 24, 28, 31, 15,
16, 2, 3, 0, 1,
27, 30, 31, 34, 35, 32, 28, 22, 24, 25, 18, 19, 30, 31, 32, 29,
33, 36, 15, 16, 2, 3, 0, 1,
};
private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2)
{
@ -868,8 +896,8 @@ private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, lo
/** Token literal values. */
public static final String[] jjstrLiteralImages = {
"", null, null, null, null, null, null, null, null, null, "\53", "\55", "\50",
"\51", "\72", "\136", null, null, null, null, null, "\133", "\173", null, "\124\117",
"\135", null, null, "\124\117", "\175", null, null, };
"\51", "\72", "\136", null, null, null, null, null, null, "\133", "\173", null,
"\124\117", "\135", null, null, "\124\117", "\175", null, null, };
/** Lexer state names. */
public static final String[] lexStateNames = {
@ -881,18 +909,18 @@ public static final String[] lexStateNames = {
/** Lex State array. */
public static final int[] jjnewLexState = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, 1, 3, -1,
3, -1, -1, -1, 3, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, 2, 1, 3,
-1, 3, -1, -1, -1, 3, -1, -1,
};
static final long[] jjtoToken = {
0xffffff81L,
0x1ffffff81L,
};
static final long[] jjtoSkip = {
0x40L,
};
protected CharStream input_stream;
private final int[] jjrounds = new int[33];
private final int[] jjstateSet = new int[66];
private final int[] jjrounds = new int[38];
private final int[] jjstateSet = new int[76];
protected char curChar;
/** Constructor. */
public PrecedenceQueryParserTokenManager(CharStream stream){
@ -917,7 +945,7 @@ private void ReInitRounds()
{
int i;
jjround = 0x80000001;
for (i = 33; i-- > 0;)
for (i = 38; i-- > 0;)
jjrounds[i] = 0x80000000;
}

View File

@ -121,4 +121,4 @@ public class Token {
}
}
/* JavaCC - OriginalChecksum=0dc5808f2ab8aac8775ea9175fa2cb51 (do not edit this line) */
/* JavaCC - OriginalChecksum=bc9495ddfa3189061fb4f1bf3c4f64e2 (do not edit this line) */

View File

@ -138,4 +138,4 @@ public class TokenMgrError extends Error
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
}
}
/* JavaCC - OriginalChecksum=257b82f2650841e86289a309cb3dae76 (do not edit this line) */
/* JavaCC - OriginalChecksum=e01667f2eb6d0b2f1fbb6958df0ca751 (do not edit this line) */

View File

@ -0,0 +1,52 @@
package org.apache.lucene.queryParser.standard.builders;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.core.QueryNodeException;
import org.apache.lucene.queryParser.core.nodes.QueryNode;
import org.apache.lucene.queryParser.standard.config.MultiTermRewriteMethodAttribute;
import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.RegexpQuery;
/**
* Builds a {@link RegexpQuery} object from a {@link RegexpQueryNode} object.
*/
public class RegexpQueryNodeBuilder implements StandardQueryBuilder {
public RegexpQueryNodeBuilder() {
// empty constructor
}
public RegexpQuery build(QueryNode queryNode) throws QueryNodeException {
RegexpQueryNode regexpNode = (RegexpQueryNode) queryNode;
RegexpQuery q = new RegexpQuery(new Term(regexpNode.getFieldAsString(),
regexpNode.textToBytesRef()));
MultiTermQuery.RewriteMethod method = (MultiTermQuery.RewriteMethod) queryNode
.getTag(MultiTermRewriteMethodAttribute.TAG_ID);
if (method != null) {
q.setRewriteMethod(method);
}
return q;
}
}

View File

@ -33,6 +33,7 @@ import org.apache.lucene.queryParser.core.nodes.TokenizedPhraseQueryNode;
import org.apache.lucene.queryParser.standard.nodes.MultiPhraseQueryNode;
import org.apache.lucene.queryParser.standard.nodes.PrefixWildcardQueryNode;
import org.apache.lucene.queryParser.standard.nodes.RangeQueryNode;
import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode;
import org.apache.lucene.queryParser.standard.nodes.StandardBooleanQueryNode;
import org.apache.lucene.queryParser.standard.nodes.WildcardQueryNode;
import org.apache.lucene.queryParser.standard.processors.StandardQueryNodeProcessorPipeline;
@ -63,6 +64,7 @@ public class StandardQueryTreeBuilder extends QueryTreeBuilder implements
setBuilder(PrefixWildcardQueryNode.class,
new PrefixWildcardQueryNodeBuilder());
setBuilder(RangeQueryNode.class, new RangeQueryNodeBuilder());
setBuilder(RegexpQueryNode.class, new RegexpQueryNodeBuilder());
setBuilder(SlopQueryNode.class, new SlopQueryNodeBuilder());
setBuilder(StandardBooleanQueryNode.class,
new StandardBooleanQueryNodeBuilder());

View File

@ -0,0 +1,92 @@
package org.apache.lucene.queryParser.standard.nodes;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.queryParser.core.nodes.FieldableNode;
import org.apache.lucene.queryParser.core.nodes.QueryNodeImpl;
import org.apache.lucene.queryParser.core.nodes.TextableQueryNode;
import org.apache.lucene.queryParser.core.parser.EscapeQuerySyntax;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.util.BytesRef;
/**
* A {@link RegexpQueryNode} represents {@link RegexpQuery} query Examples: /[a-z]|[0-9]/
*/
public class RegexpQueryNode extends QueryNodeImpl implements TextableQueryNode,
FieldableNode {
private static final long serialVersionUID = 0L;
private CharSequence text;
private CharSequence field;
/**
* @param field
* - field name
* @param text
* - value that contains a regular expression
* @param begin
* - position in the query string
* @param end
* - position in the query string
*/
public RegexpQueryNode(CharSequence field, CharSequence text, int begin,
int end) {
this.field = field;
this.text = text.subSequence(begin, end);
}
public BytesRef textToBytesRef() {
return new BytesRef(text);
}
@Override
public String toString() {
return "<regexp field='" + this.field + "' term='" + this.text + "'/>";
}
@Override
public RegexpQueryNode cloneTree() throws CloneNotSupportedException {
RegexpQueryNode clone = (RegexpQueryNode) super.cloneTree();
clone.field = this.field;
clone.text = this.text;
return clone;
}
public CharSequence getText() {
return text;
}
public void setText(CharSequence text) {
this.text = text;
}
public CharSequence getField() {
return field;
}
public String getFieldAsString() {
return field.toString();
}
public void setField(CharSequence field) {
this.field = field;
}
public CharSequence toQueryString(EscapeQuerySyntax escapeSyntaxParser) {
return isDefaultField(field)? "/"+text+"/": field + ":/" + text + "/";
}
}

View File

@ -613,4 +613,4 @@ public class JavaCharStream
}
}
/* JavaCC - OriginalChecksum=f19c73b8f7faf94cc4a581e7b2933cc6 (do not edit this line) */
/* JavaCC - OriginalChecksum=31519f95b41182c6740c2afd8dfbf344 (do not edit this line) */

View File

@ -193,4 +193,4 @@ public class ParseException extends QueryNodeParseException {
}
}
/* JavaCC - OriginalChecksum=38bce846fe6c8482993969f741c0323e (do not edit this line) */
/* JavaCC - OriginalChecksum=d0caeac083e9874065f9d1e298b5ccd9 (do not edit this line) */

View File

@ -40,6 +40,7 @@ import org.apache.lucene.queryParser.core.nodes.OpaqueQueryNode;
import org.apache.lucene.queryParser.core.nodes.OrQueryNode;
import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode;
import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode;
import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode;
import org.apache.lucene.queryParser.core.nodes.SlopQueryNode;
import org.apache.lucene.queryParser.core.nodes.ProximityQueryNode;
import org.apache.lucene.queryParser.core.nodes.QueryNode;
@ -178,6 +179,7 @@ public class StandardSyntaxParser implements SyntaxParser, StandardSyntaxParserC
case LPAREN:
case QUOTED:
case TERM:
case REGEXPTERM:
case RANGEIN_START:
case RANGEEX_START:
case NUMBER:
@ -326,6 +328,7 @@ public class StandardSyntaxParser implements SyntaxParser, StandardSyntaxParserC
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case QUOTED:
case TERM:
case REGEXPTERM:
case RANGEIN_START:
case RANGEEX_START:
case NUMBER:
@ -373,17 +376,23 @@ public class StandardSyntaxParser implements SyntaxParser, StandardSyntaxParserC
final public QueryNode Term(CharSequence field) throws ParseException {
Token term, boost=null, fuzzySlop=null, goop1, goop2;
boolean fuzzy = false;
boolean regexp = false;
QueryNode q =null;
ParametricQueryNode qLower, qUpper;
float defaultMinSimilarity = 0.5f;
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case TERM:
case REGEXPTERM:
case NUMBER:
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case TERM:
term = jj_consume_token(TERM);
q = new FieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn);
break;
case REGEXPTERM:
term = jj_consume_token(REGEXPTERM);
regexp=true;
break;
case NUMBER:
term = jj_consume_token(NUMBER);
break;
@ -428,6 +437,8 @@ public class StandardSyntaxParser implements SyntaxParser, StandardSyntaxParserC
{if (true) throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS));}
}
q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn);
} else if (regexp) {
q = new RegexpQueryNode(field, term.image, term.beginColumn, term.endColumn-1);
}
break;
case RANGEIN_START:
@ -630,7 +641,7 @@ public class StandardSyntaxParser implements SyntaxParser, StandardSyntaxParserC
jj_la1_init_0();
}
private static void jj_la1_init_0() {
jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0x763c00,0x200,0x100,0x10000,0x762000,0x440000,0x80000,0x80000,0x10000,0x6000000,0x800000,0x6000000,0x10000,0x60000000,0x8000000,0x60000000,0x10000,0x80000,0x10000,0x760000,};
jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0xf63c00,0x200,0x100,0x10000,0xf62000,0x940000,0x80000,0x80000,0x10000,0xc000000,0x1000000,0xc000000,0x10000,0xc0000000,0x10000000,0xc0000000,0x10000,0x80000,0x10000,0xf60000,};
}
final private JJCalls[] jj_2_rtns = new JJCalls[1];
private boolean jj_rescan = false;
@ -816,7 +827,7 @@ public class StandardSyntaxParser implements SyntaxParser, StandardSyntaxParserC
/** Generate ParseException. */
public ParseException generateParseException() {
jj_expentries.clear();
boolean[] la1tokens = new boolean[31];
boolean[] la1tokens = new boolean[32];
if (jj_kind >= 0) {
la1tokens[jj_kind] = true;
jj_kind = -1;
@ -830,7 +841,7 @@ public class StandardSyntaxParser implements SyntaxParser, StandardSyntaxParserC
}
}
}
for (int i = 0; i < 31; i++) {
for (int i = 0; i < 32; i++) {
if (la1tokens[i]) {
jj_expentry = new int[1];
jj_expentry[0] = i;

View File

@ -52,6 +52,7 @@ import org.apache.lucene.queryParser.core.nodes.OpaqueQueryNode;
import org.apache.lucene.queryParser.core.nodes.OrQueryNode;
import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode;
import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode;
import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode;
import org.apache.lucene.queryParser.core.nodes.SlopQueryNode;
import org.apache.lucene.queryParser.core.nodes.ProximityQueryNode;
import org.apache.lucene.queryParser.core.nodes.QueryNode;
@ -132,6 +133,7 @@ PARSER_END(StandardSyntaxParser)
| <QUOTED: "\"" (<_QUOTED_CHAR>)* "\"">
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
| <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* "/" >
| <RANGEIN_START: "[" > : RangeIn
| <RANGEEX_START: "{" > : RangeEx
}
@ -374,6 +376,7 @@ QueryNode Clause(CharSequence field) : {
QueryNode Term(CharSequence field) : {
Token term, boost=null, fuzzySlop=null, goop1, goop2;
boolean fuzzy = false;
boolean regexp = false;
QueryNode q =null;
ParametricQueryNode qLower, qUpper;
float defaultMinSimilarity = 0.5f;
@ -382,6 +385,7 @@ QueryNode Term(CharSequence field) : {
(
(
term=<TERM> { q = new FieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); }
| term=<REGEXPTERM> { regexp=true; }
| term=<NUMBER>
)
[ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
@ -396,6 +400,8 @@ QueryNode Term(CharSequence field) : {
throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS));
}
q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn);
} else if (regexp) {
q = new RegexpQueryNode(field, term.image, term.beginColumn, term.endColumn-1);
}
}
| ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )

View File

@ -47,27 +47,29 @@ public interface StandardSyntaxParserConstants {
/** RegularExpression Id. */
int FUZZY_SLOP = 19;
/** RegularExpression Id. */
int RANGEIN_START = 20;
int REGEXPTERM = 20;
/** RegularExpression Id. */
int RANGEEX_START = 21;
int RANGEIN_START = 21;
/** RegularExpression Id. */
int NUMBER = 22;
int RANGEEX_START = 22;
/** RegularExpression Id. */
int RANGEIN_TO = 23;
int NUMBER = 23;
/** RegularExpression Id. */
int RANGEIN_END = 24;
int RANGEIN_TO = 24;
/** RegularExpression Id. */
int RANGEIN_QUOTED = 25;
int RANGEIN_END = 25;
/** RegularExpression Id. */
int RANGEIN_GOOP = 26;
int RANGEIN_QUOTED = 26;
/** RegularExpression Id. */
int RANGEEX_TO = 27;
int RANGEIN_GOOP = 27;
/** RegularExpression Id. */
int RANGEEX_END = 28;
int RANGEEX_TO = 28;
/** RegularExpression Id. */
int RANGEEX_QUOTED = 29;
int RANGEEX_END = 29;
/** RegularExpression Id. */
int RANGEEX_GOOP = 30;
int RANGEEX_QUOTED = 30;
/** RegularExpression Id. */
int RANGEEX_GOOP = 31;
/** Lexical state. */
int Boost = 0;
@ -100,6 +102,7 @@ public interface StandardSyntaxParserConstants {
"<QUOTED>",
"<TERM>",
"<FUZZY_SLOP>",
"<REGEXPTERM>",
"\"[\"",
"\"{\"",
"<NUMBER>",

View File

@ -38,6 +38,7 @@ import org.apache.lucene.queryParser.core.nodes.OpaqueQueryNode;
import org.apache.lucene.queryParser.core.nodes.OrQueryNode;
import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode;
import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode;
import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode;
import org.apache.lucene.queryParser.core.nodes.SlopQueryNode;
import org.apache.lucene.queryParser.core.nodes.ProximityQueryNode;
import org.apache.lucene.queryParser.core.nodes.QueryNode;
@ -86,11 +87,11 @@ private int jjMoveStringLiteralDfa0_3()
case 58:
return jjStopAtPos(0, 15);
case 91:
return jjStopAtPos(0, 20);
return jjStopAtPos(0, 21);
case 94:
return jjStopAtPos(0, 16);
case 123:
return jjStopAtPos(0, 21);
return jjStopAtPos(0, 22);
default :
return jjMoveNfa_3(0, 0);
}
@ -110,7 +111,7 @@ static final long[] jjbitVec4 = {
private int jjMoveNfa_3(int startState, int curPos)
{
int startsAt = 0;
jjnewStateCnt = 28;
jjnewStateCnt = 33;
int i = 1;
jjstateSet[0] = startState;
int kind = 0x7fffffff;
@ -144,7 +145,9 @@ private int jjMoveNfa_3(int startState, int curPos)
if (kind > 10)
kind = 10;
}
if (curChar == 38)
if (curChar == 47)
jjCheckNAddStates(3, 5);
else if (curChar == 38)
jjstateSet[jjnewStateCnt++] = 4;
break;
case 4:
@ -198,7 +201,7 @@ private int jjMoveNfa_3(int startState, int curPos)
break;
if (kind > 19)
kind = 19;
jjAddStates(3, 4);
jjAddStates(6, 7);
break;
case 26:
if (curChar == 46)
@ -211,6 +214,19 @@ private int jjMoveNfa_3(int startState, int curPos)
kind = 19;
jjCheckNAdd(27);
break;
case 28:
case 30:
if (curChar == 47)
jjCheckNAddStates(3, 5);
break;
case 29:
if ((0xffff7fffffffffffL & l) != 0L)
jjCheckNAddStates(3, 5);
break;
case 32:
if (curChar == 47 && kind > 20)
kind = 20;
break;
default : break;
}
} while(i != startsAt);
@ -325,6 +341,13 @@ private int jjMoveNfa_3(int startState, int curPos)
kind = 19;
jjstateSet[jjnewStateCnt++] = 25;
break;
case 29:
jjAddStates(3, 5);
break;
case 31:
if (curChar == 92)
jjstateSet[jjnewStateCnt++] = 30;
break;
default : break;
}
} while(i != startsAt);
@ -373,6 +396,10 @@ private int jjMoveNfa_3(int startState, int curPos)
kind = 18;
jjCheckNAddTwoStates(20, 21);
break;
case 29:
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
jjAddStates(3, 5);
break;
default : break;
}
} while(i != startsAt);
@ -384,7 +411,7 @@ private int jjMoveNfa_3(int startState, int curPos)
kind = 0x7fffffff;
}
++curPos;
if ((i = jjnewStateCnt) == (startsAt = 28 - (jjnewStateCnt = startsAt)))
if ((i = jjnewStateCnt) == (startsAt = 33 - (jjnewStateCnt = startsAt)))
return curPos;
try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) { return curPos; }
@ -395,9 +422,9 @@ private final int jjStopStringLiteralDfa_1(int pos, long active0)
switch (pos)
{
case 0:
if ((active0 & 0x8000000L) != 0L)
if ((active0 & 0x10000000L) != 0L)
{
jjmatchedKind = 30;
jjmatchedKind = 31;
return 6;
}
return -1;
@ -414,9 +441,9 @@ private int jjMoveStringLiteralDfa0_1()
switch(curChar)
{
case 84:
return jjMoveStringLiteralDfa1_1(0x8000000L);
return jjMoveStringLiteralDfa1_1(0x10000000L);
case 125:
return jjStopAtPos(0, 28);
return jjStopAtPos(0, 29);
default :
return jjMoveNfa_1(0, 0);
}
@ -431,8 +458,8 @@ private int jjMoveStringLiteralDfa1_1(long active0)
switch(curChar)
{
case 79:
if ((active0 & 0x8000000L) != 0L)
return jjStartNfaWithStates_1(1, 27, 6);
if ((active0 & 0x10000000L) != 0L)
return jjStartNfaWithStates_1(1, 28, 6);
break;
default :
break;
@ -468,8 +495,8 @@ private int jjMoveNfa_1(int startState, int curPos)
case 0:
if ((0xfffffffeffffffffL & l) != 0L)
{
if (kind > 30)
kind = 30;
if (kind > 31)
kind = 31;
jjCheckNAdd(6);
}
if ((0x100002600L & l) != 0L)
@ -486,21 +513,21 @@ private int jjMoveNfa_1(int startState, int curPos)
break;
case 2:
if ((0xfffffffbffffffffL & l) != 0L)
jjCheckNAddStates(5, 7);
jjCheckNAddStates(8, 10);
break;
case 3:
if (curChar == 34)
jjCheckNAddStates(5, 7);
jjCheckNAddStates(8, 10);
break;
case 5:
if (curChar == 34 && kind > 29)
kind = 29;
if (curChar == 34 && kind > 30)
kind = 30;
break;
case 6:
if ((0xfffffffeffffffffL & l) == 0L)
break;
if (kind > 30)
kind = 30;
if (kind > 31)
kind = 31;
jjCheckNAdd(6);
break;
default : break;
@ -518,12 +545,12 @@ private int jjMoveNfa_1(int startState, int curPos)
case 6:
if ((0xdfffffffffffffffL & l) == 0L)
break;
if (kind > 30)
kind = 30;
if (kind > 31)
kind = 31;
jjCheckNAdd(6);
break;
case 2:
jjAddStates(5, 7);
jjAddStates(8, 10);
break;
case 4:
if (curChar == 92)
@ -552,20 +579,20 @@ private int jjMoveNfa_1(int startState, int curPos)
}
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
{
if (kind > 30)
kind = 30;
if (kind > 31)
kind = 31;
jjCheckNAdd(6);
}
break;
case 2:
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
jjAddStates(5, 7);
jjAddStates(8, 10);
break;
case 6:
if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
break;
if (kind > 30)
kind = 30;
if (kind > 31)
kind = 31;
jjCheckNAdd(6);
break;
default : break;
@ -610,9 +637,9 @@ private int jjMoveNfa_0(int startState, int curPos)
case 0:
if ((0x3ff000000000000L & l) == 0L)
break;
if (kind > 22)
kind = 22;
jjAddStates(8, 9);
if (kind > 23)
kind = 23;
jjAddStates(11, 12);
break;
case 1:
if (curChar == 46)
@ -621,8 +648,8 @@ private int jjMoveNfa_0(int startState, int curPos)
case 2:
if ((0x3ff000000000000L & l) == 0L)
break;
if (kind > 22)
kind = 22;
if (kind > 23)
kind = 23;
jjCheckNAdd(2);
break;
default : break;
@ -673,9 +700,9 @@ private final int jjStopStringLiteralDfa_2(int pos, long active0)
switch (pos)
{
case 0:
if ((active0 & 0x800000L) != 0L)
if ((active0 & 0x1000000L) != 0L)
{
jjmatchedKind = 26;
jjmatchedKind = 27;
return 6;
}
return -1;
@ -692,9 +719,9 @@ private int jjMoveStringLiteralDfa0_2()
switch(curChar)
{
case 84:
return jjMoveStringLiteralDfa1_2(0x800000L);
return jjMoveStringLiteralDfa1_2(0x1000000L);
case 93:
return jjStopAtPos(0, 24);
return jjStopAtPos(0, 25);
default :
return jjMoveNfa_2(0, 0);
}
@ -709,8 +736,8 @@ private int jjMoveStringLiteralDfa1_2(long active0)
switch(curChar)
{
case 79:
if ((active0 & 0x800000L) != 0L)
return jjStartNfaWithStates_2(1, 23, 6);
if ((active0 & 0x1000000L) != 0L)
return jjStartNfaWithStates_2(1, 24, 6);
break;
default :
break;
@ -746,8 +773,8 @@ private int jjMoveNfa_2(int startState, int curPos)
case 0:
if ((0xfffffffeffffffffL & l) != 0L)
{
if (kind > 26)
kind = 26;
if (kind > 27)
kind = 27;
jjCheckNAdd(6);
}
if ((0x100002600L & l) != 0L)
@ -764,21 +791,21 @@ private int jjMoveNfa_2(int startState, int curPos)
break;
case 2:
if ((0xfffffffbffffffffL & l) != 0L)
jjCheckNAddStates(5, 7);
jjCheckNAddStates(8, 10);
break;
case 3:
if (curChar == 34)
jjCheckNAddStates(5, 7);
jjCheckNAddStates(8, 10);
break;
case 5:
if (curChar == 34 && kind > 25)
kind = 25;
if (curChar == 34 && kind > 26)
kind = 26;
break;
case 6:
if ((0xfffffffeffffffffL & l) == 0L)
break;
if (kind > 26)
kind = 26;
if (kind > 27)
kind = 27;
jjCheckNAdd(6);
break;
default : break;
@ -796,12 +823,12 @@ private int jjMoveNfa_2(int startState, int curPos)
case 6:
if ((0xffffffffdfffffffL & l) == 0L)
break;
if (kind > 26)
kind = 26;
if (kind > 27)
kind = 27;
jjCheckNAdd(6);
break;
case 2:
jjAddStates(5, 7);
jjAddStates(8, 10);
break;
case 4:
if (curChar == 92)
@ -830,20 +857,20 @@ private int jjMoveNfa_2(int startState, int curPos)
}
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
{
if (kind > 26)
kind = 26;
if (kind > 27)
kind = 27;
jjCheckNAdd(6);
}
break;
case 2:
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
jjAddStates(5, 7);
jjAddStates(8, 10);
break;
case 6:
if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
break;
if (kind > 26)
kind = 26;
if (kind > 27)
kind = 27;
jjCheckNAdd(6);
break;
default : break;
@ -864,7 +891,7 @@ private int jjMoveNfa_2(int startState, int curPos)
}
}
static final int[] jjnextStates = {
15, 16, 18, 25, 26, 2, 4, 5, 0, 1,
15, 16, 18, 29, 31, 32, 25, 26, 2, 4, 5, 0, 1,
};
private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2)
{
@ -906,8 +933,8 @@ private static final boolean jjCanMove_2(int hiByte, int i1, int i2, long l1, lo
/** Token literal values. */
public static final String[] jjstrLiteralImages = {
"", null, null, null, null, null, null, null, null, null, null, "\53", "\55",
"\50", "\51", "\72", "\136", null, null, null, "\133", "\173", null, "\124\117",
"\135", null, null, "\124\117", "\175", null, null, };
"\50", "\51", "\72", "\136", null, null, null, null, "\133", "\173", null,
"\124\117", "\135", null, null, "\124\117", "\175", null, null, };
/** Lexer state names. */
public static final String[] lexStateNames = {
@ -919,18 +946,18 @@ public static final String[] lexStateNames = {
/** Lex State array. */
public static final int[] jjnewLexState = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, 2, 1, 3, -1, 3,
-1, -1, -1, 3, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, 2, 1, 3, -1,
3, -1, -1, -1, 3, -1, -1,
};
static final long[] jjtoToken = {
0x7fffff01L,
0xffffff01L,
};
static final long[] jjtoSkip = {
0x80L,
};
protected JavaCharStream input_stream;
private final int[] jjrounds = new int[28];
private final int[] jjstateSet = new int[56];
private final int[] jjrounds = new int[33];
private final int[] jjstateSet = new int[66];
protected char curChar;
/** Constructor. */
public StandardSyntaxParserTokenManager(JavaCharStream stream){
@ -957,7 +984,7 @@ private void ReInitRounds()
{
int i;
jjround = 0x80000001;
for (i = 28; i-- > 0;)
for (i = 33; i-- > 0;)
jjrounds[i] = 0x80000000;
}

View File

@ -121,4 +121,4 @@ public class Token {
}
}
/* JavaCC - OriginalChecksum=0aac6816ecd328eda2f38b9d09739ab6 (do not edit this line) */
/* JavaCC - OriginalChecksum=cecb6022e0f2e2fca751015375f6d319 (do not edit this line) */

View File

@ -138,4 +138,4 @@ public class TokenMgrError extends Error
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
}
}
/* JavaCC - OriginalChecksum=a75b5b61664a73631a032a6e44f4b38a (do not edit this line) */
/* JavaCC - OriginalChecksum=0e9c5fad06efef4f41f97b851ac7b0ce (do not edit this line) */

View File

@ -25,9 +25,11 @@ import org.apache.lucene.queryParser.core.nodes.FieldQueryNode;
import org.apache.lucene.queryParser.core.nodes.FuzzyQueryNode;
import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode;
import org.apache.lucene.queryParser.core.nodes.QueryNode;
import org.apache.lucene.queryParser.core.nodes.TextableQueryNode;
import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl;
import org.apache.lucene.queryParser.core.util.UnescapedCharSequence;
import org.apache.lucene.queryParser.standard.config.LowercaseExpandedTermsAttribute;
import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode;
import org.apache.lucene.queryParser.standard.nodes.WildcardQueryNode;
/**
@ -70,10 +72,10 @@ public class LowercaseExpandedTermsQueryNodeProcessor extends
protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException {
if (node instanceof WildcardQueryNode || node instanceof FuzzyQueryNode
|| node instanceof ParametricQueryNode) {
|| node instanceof ParametricQueryNode || node instanceof RegexpQueryNode) {
FieldQueryNode fieldNode = (FieldQueryNode) node;
fieldNode.setText(UnescapedCharSequence.toLowerCase(fieldNode.getText()));
TextableQueryNode txtNode = (TextableQueryNode) node;
txtNode.setText(UnescapedCharSequence.toLowerCase(txtNode.getText()));
}
return node;

View File

@ -23,6 +23,7 @@ import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode;
import org.apache.lucene.queryParser.core.nodes.QueryNode;
import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl;
import org.apache.lucene.queryParser.standard.config.MultiTermRewriteMethodAttribute;
import org.apache.lucene.queryParser.standard.nodes.RegexpQueryNode;
import org.apache.lucene.queryParser.standard.nodes.WildcardQueryNode;
import org.apache.lucene.search.MultiTermQuery;
@ -40,7 +41,7 @@ public class MultiTermRewriteMethodProcessor extends QueryNodeProcessorImpl {
// set setMultiTermRewriteMethod for WildcardQueryNode and
// PrefixWildcardQueryNode
if (node instanceof WildcardQueryNode
|| node instanceof ParametricRangeQueryNode) {
|| node instanceof ParametricRangeQueryNode || node instanceof RegexpQueryNode) {
if (!getQueryConfigHandler().hasAttribute(
MultiTermRewriteMethodAttribute.class)) {

View File

@ -28,15 +28,19 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.util.LocalizedTestCase;
import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
@ -691,6 +695,35 @@ public class TestPrecedenceQueryParser extends LocalizedTestCase {
assertEquals(query1, query2);
}
public void testRegexps() throws Exception {
PrecedenceQueryParser qp = getParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]"));
assertEquals(q, qp.parse("/[a-z][123]/"));
qp.setLowercaseExpandedTerms(true);
assertEquals(q, qp.parse("/[A-Z][123]/"));
q.setBoost(0.5f);
assertEquals(q, qp.parse("/[A-Z][123]/^0.5"));
qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
assertTrue(qp.parse("/[A-Z][123]/^0.5") instanceof RegexpQuery);
assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)qp.parse("/[A-Z][123]/^0.5")).getRewriteMethod());
assertEquals(q, qp.parse("/[A-Z][123]/^0.5"));
qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]"));
assertEquals(escaped, qp.parse("/[a-z]\\/[123]/"));
Query escaped2 = new RegexpQuery(new Term("field", "[a-z]\\*[123]"));
assertEquals(escaped2, qp.parse("/[a-z]\\*[123]/"));
BooleanQuery complex = new BooleanQuery();
BooleanQuery inner = new BooleanQuery();
inner.add(new RegexpQuery(new Term("field", "[a-z]\\/[123]")), Occur.MUST);
inner.add(new TermQuery(new Term("path", "/etc/init.d/")), Occur.MUST);
complex.add(inner, Occur.SHOULD);
complex.add(new TermQuery(new Term("field", "/etc/init[.]d/lucene/")), Occur.SHOULD);
assertEquals(complex, qp.parse("/[a-z]\\/[123]/ AND path:/etc/init.d/ OR /etc\\/init\\[.\\]d/lucene/ "));
}
@Override
protected void tearDown() throws Exception {

View File

@ -68,10 +68,12 @@ import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LocalizedTestCase;
import org.apache.lucene.util.automaton.BasicAutomata;
@ -1156,6 +1158,34 @@ public class TestQPHelper extends LocalizedTestCase {
}
public void testRegexps() throws Exception {
StandardQueryParser qp = new StandardQueryParser();
final String df = "field" ;
RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]"));
assertEquals(q, qp.parse("/[a-z][123]/", df));
qp.setLowercaseExpandedTerms(true);
assertEquals(q, qp.parse("/[A-Z][123]/", df));
q.setBoost(0.5f);
assertEquals(q, qp.parse("/[A-Z][123]/^0.5", df));
qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
assertTrue(qp.parse("/[A-Z][123]/^0.5", df) instanceof RegexpQuery);
assertEquals(q, qp.parse("/[A-Z][123]/^0.5", df));
assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)qp.parse("/[A-Z][123]/^0.5", df)).getRewriteMethod());
qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]"));
assertEquals(escaped, qp.parse("/[a-z]\\/[123]/", df));
Query escaped2 = new RegexpQuery(new Term("field", "[a-z]\\*[123]"));
assertEquals(escaped2, qp.parse("/[a-z]\\*[123]/", df));
BooleanQuery complex = new BooleanQuery();
complex.add(new RegexpQuery(new Term("field", "[a-z]\\/[123]")), Occur.MUST);
complex.add(new TermQuery(new Term("path", "/etc/init.d/")), Occur.MUST);
complex.add(new TermQuery(new Term("field", "/etc/init[.]d/lucene/")), Occur.SHOULD);
assertEquals(complex, qp.parse("/[a-z]\\/[123]/ AND path:/etc/init.d/ OR /etc\\/init\\[.\\]d/lucene/ ", df));
}
public void testStopwords() throws Exception {
StandardQueryParser qp = new StandardQueryParser();
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());

View File

@ -109,4 +109,4 @@ public interface CharStream {
void Done();
}
/* JavaCC - OriginalChecksum=32a89423891f765dde472f7ef0e3ef7b (do not edit this line) */
/* JavaCC - OriginalChecksum=a83909a2403f969f94d18375f9f143e4 (do not edit this line) */

View File

@ -195,4 +195,4 @@ public class ParseException extends Exception {
}
}
/* JavaCC - OriginalChecksum=c7631a240f7446940695eac31d9483ca (do not edit this line) */
/* JavaCC - OriginalChecksum=c63b396885c4ff44d7aa48d3feae60cd (do not edit this line) */

View File

@ -29,6 +29,7 @@ import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
@ -861,6 +862,17 @@ public class QueryParser implements QueryParserConstants {
return query;
}
/**
* Builds a new RegexpQuery instance
* @param prefix Regexp term
* @return new RegexpQuery instance
*/
protected Query newRegexpQuery(Term regexp) {
RegexpQuery query = new RegexpQuery(regexp);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
/**
* Builds a new FuzzyQuery instance
* @param term Term
@ -985,6 +997,35 @@ public class QueryParser implements QueryParserConstants {
return newWildcardQuery(t);
}
/**
* Factory method for generating a query. Called when parser
* parses an input term token that contains a regular expression
* query.
*<p>
* Depending on settings, pattern term may be lower-cased
* automatically. It will not go through the default Analyzer,
* however, since normal Analyzers are unlikely to work properly
* with regular expression templates.
*<p>
* Can be overridden by extending classes, to provide custom handling for
* regular expression queries, which may be necessary due to missing analyzer
* calls.
*
* @param field Name of the field query will use.
* @param termStr Term token that contains a regular expression
*
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
protected Query getRegexpQuery(String field, String termStr) throws ParseException
{
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr);
return newRegexpQuery(t);
}
/**
* Factory method for generating a query (similar to
* {@link #getWildcardQuery}). Called when parser parses an input term
@ -1234,6 +1275,7 @@ public class QueryParser implements QueryParserConstants {
case TERM:
case PREFIXTERM:
case WILDTERM:
case REGEXPTERM:
case RANGEIN_START:
case RANGEEX_START:
case NUMBER:
@ -1285,6 +1327,7 @@ public class QueryParser implements QueryParserConstants {
case TERM:
case PREFIXTERM:
case WILDTERM:
case REGEXPTERM:
case RANGEIN_START:
case RANGEEX_START:
case NUMBER:
@ -1325,12 +1368,14 @@ public class QueryParser implements QueryParserConstants {
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
boolean regexp = false;
Query q;
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case STAR:
case TERM:
case PREFIXTERM:
case WILDTERM:
case REGEXPTERM:
case NUMBER:
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case TERM:
@ -1348,6 +1393,10 @@ public class QueryParser implements QueryParserConstants {
term = jj_consume_token(WILDTERM);
wildcard=true;
break;
case REGEXPTERM:
term = jj_consume_token(REGEXPTERM);
regexp=true;
break;
case NUMBER:
term = jj_consume_token(NUMBER);
break;
@ -1390,6 +1439,8 @@ public class QueryParser implements QueryParserConstants {
q = getPrefixQuery(field,
discardEscapeChar(term.image.substring
(0, term.image.length()-1)));
} else if (regexp) {
q = getRegexpQuery(field, term.image.substring(1, term.image.length()-1));
} else if (fuzzy) {
float fms = fuzzyMinSim;
try {
@ -1569,6 +1620,12 @@ public class QueryParser implements QueryParserConstants {
finally { jj_save(0, xla); }
}
private boolean jj_3R_2() {
if (jj_scan_token(TERM)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
private boolean jj_3_1() {
Token xsp;
xsp = jj_scanpos;
@ -1585,12 +1642,6 @@ public class QueryParser implements QueryParserConstants {
return false;
}
private boolean jj_3R_2() {
if (jj_scan_token(TERM)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
/** Generated Token Manager. */
public QueryParserTokenManager token_source;
/** Current token. */
@ -1609,10 +1660,10 @@ public class QueryParser implements QueryParserConstants {
jj_la1_init_1();
}
private static void jj_la1_init_0() {
jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0x3ed3f00,0x90000,0x20000,0x3ed2000,0x2690000,0x100000,0x100000,0x20000,0x30000000,0x4000000,0x30000000,0x20000,0x0,0x40000000,0x0,0x20000,0x100000,0x20000,0x3ed0000,};
jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0x7ed3f00,0x90000,0x20000,0x7ed2000,0x4e90000,0x100000,0x100000,0x20000,0x60000000,0x8000000,0x60000000,0x20000,0x0,0x80000000,0x0,0x20000,0x100000,0x20000,0x7ed0000,};
}
private static void jj_la1_init_1() {
jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3,0x0,0x3,0x0,0x0,0x0,0x0,};
jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x6,0x0,0x6,0x0,0x0,0x0,0x0,};
}
final private JJCalls[] jj_2_rtns = new JJCalls[1];
private boolean jj_rescan = false;
@ -1766,7 +1817,7 @@ public class QueryParser implements QueryParserConstants {
/** Generate ParseException. */
public ParseException generateParseException() {
jj_expentries.clear();
boolean[] la1tokens = new boolean[34];
boolean[] la1tokens = new boolean[35];
if (jj_kind >= 0) {
la1tokens[jj_kind] = true;
jj_kind = -1;
@ -1783,7 +1834,7 @@ public class QueryParser implements QueryParserConstants {
}
}
}
for (int i = 0; i < 34; i++) {
for (int i = 0; i < 35; i++) {
if (la1tokens[i]) {
jj_expentry = new int[1];
jj_expentry[0] = i;

View File

@ -53,6 +53,7 @@ import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
@ -885,6 +886,17 @@ public class QueryParser {
return query;
}
/**
* Builds a new RegexpQuery instance
* @param prefix Regexp term
* @return new RegexpQuery instance
*/
protected Query newRegexpQuery(Term regexp) {
RegexpQuery query = new RegexpQuery(regexp);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
/**
* Builds a new FuzzyQuery instance
* @param term Term
@ -1009,6 +1021,35 @@ public class QueryParser {
return newWildcardQuery(t);
}
/**
* Factory method for generating a query. Called when parser
* parses an input term token that contains a regular expression
* query.
*<p>
* Depending on settings, pattern term may be lower-cased
* automatically. It will not go through the default Analyzer,
* however, since normal Analyzers are unlikely to work properly
* with regular expression templates.
*<p>
* Can be overridden by extending classes, to provide custom handling for
* regular expression queries, which may be necessary due to missing analyzer
* calls.
*
* @param field Name of the field query will use.
* @param termStr Term token that contains a regular expression
*
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
protected Query getRegexpQuery(String field, String termStr) throws ParseException
{
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr);
return newRegexpQuery(t);
}
/**
* Factory method for generating a query (similar to
* {@link #getWildcardQuery}). Called when parser parses an input term
@ -1218,6 +1259,7 @@ PARSER_END(QueryParser)
| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
| <PREFIXTERM: ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >
| <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
| <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* "/" >
| <RANGEIN_START: "[" > : RangeIn
| <RANGEEX_START: "{" > : RangeEx
}
@ -1340,6 +1382,7 @@ Query Term(String field) : {
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
boolean regexp = false;
Query q;
}
{
@ -1349,6 +1392,7 @@ Query Term(String field) : {
| term=<STAR> { wildcard=true; }
| term=<PREFIXTERM> { prefix=true; }
| term=<WILDTERM> { wildcard=true; }
| term=<REGEXPTERM> { regexp=true; }
| term=<NUMBER>
)
[ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
@ -1361,6 +1405,8 @@ Query Term(String field) : {
q = getPrefixQuery(field,
discardEscapeChar(term.image.substring
(0, term.image.length()-1)));
} else if (regexp) {
q = getRegexpQuery(field, term.image.substring(1, term.image.length()-1));
} else if (fuzzy) {
float fms = fuzzyMinSim;
try {

View File

@ -53,27 +53,29 @@ public interface QueryParserConstants {
/** RegularExpression Id. */
int WILDTERM = 22;
/** RegularExpression Id. */
int RANGEIN_START = 23;
int REGEXPTERM = 23;
/** RegularExpression Id. */
int RANGEEX_START = 24;
int RANGEIN_START = 24;
/** RegularExpression Id. */
int NUMBER = 25;
int RANGEEX_START = 25;
/** RegularExpression Id. */
int RANGEIN_TO = 26;
int NUMBER = 26;
/** RegularExpression Id. */
int RANGEIN_END = 27;
int RANGEIN_TO = 27;
/** RegularExpression Id. */
int RANGEIN_QUOTED = 28;
int RANGEIN_END = 28;
/** RegularExpression Id. */
int RANGEIN_GOOP = 29;
int RANGEIN_QUOTED = 29;
/** RegularExpression Id. */
int RANGEEX_TO = 30;
int RANGEIN_GOOP = 30;
/** RegularExpression Id. */
int RANGEEX_END = 31;
int RANGEEX_TO = 31;
/** RegularExpression Id. */
int RANGEEX_QUOTED = 32;
int RANGEEX_END = 32;
/** RegularExpression Id. */
int RANGEEX_GOOP = 33;
int RANGEEX_QUOTED = 33;
/** RegularExpression Id. */
int RANGEEX_GOOP = 34;
/** Lexical state. */
int Boost = 0;
@ -109,6 +111,7 @@ public interface QueryParserConstants {
"<FUZZY_SLOP>",
"<PREFIXTERM>",
"<WILDTERM>",
"<REGEXPTERM>",
"\"[\"",
"\"{\"",
"<NUMBER>",

View File

@ -27,6 +27,7 @@ import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
@ -70,7 +71,7 @@ private int jjMoveStringLiteralDfa0_3()
case 41:
return jjStopAtPos(0, 14);
case 42:
return jjStartNfaWithStates_3(0, 16, 36);
return jjStartNfaWithStates_3(0, 16, 41);
case 43:
return jjStopAtPos(0, 11);
case 45:
@ -78,11 +79,11 @@ private int jjMoveStringLiteralDfa0_3()
case 58:
return jjStopAtPos(0, 15);
case 91:
return jjStopAtPos(0, 23);
return jjStopAtPos(0, 24);
case 94:
return jjStopAtPos(0, 17);
case 123:
return jjStopAtPos(0, 24);
return jjStopAtPos(0, 25);
default :
return jjMoveNfa_3(0, 0);
}
@ -110,7 +111,7 @@ static final long[] jjbitVec4 = {
private int jjMoveNfa_3(int startState, int curPos)
{
int startsAt = 0;
jjnewStateCnt = 36;
jjnewStateCnt = 41;
int i = 1;
jjstateSet[0] = startState;
int kind = 0x7fffffff;
@ -125,14 +126,6 @@ private int jjMoveNfa_3(int startState, int curPos)
{
switch(jjstateSet[--i])
{
case 36:
case 25:
if ((0xfbfffcf8ffffd9ffL & l) == 0L)
break;
if (kind > 22)
kind = 22;
jjCheckNAddTwoStates(25, 26);
break;
case 0:
if ((0xfbffd4f8ffffd9ffL & l) != 0L)
{
@ -163,9 +156,19 @@ private int jjMoveNfa_3(int startState, int curPos)
if (kind > 21)
kind = 21;
}
if (curChar == 38)
if (curChar == 47)
jjCheckNAddStates(8, 10);
else if (curChar == 38)
jjstateSet[jjnewStateCnt++] = 4;
break;
case 41:
case 25:
if ((0xfbfffcf8ffffd9ffL & l) == 0L)
break;
if (kind > 22)
kind = 22;
jjCheckNAddTwoStates(25, 26);
break;
case 4:
if (curChar == 38 && kind > 8)
kind = 8;
@ -198,7 +201,7 @@ private int jjMoveNfa_3(int startState, int curPos)
break;
if (kind > 20)
kind = 20;
jjAddStates(8, 9);
jjAddStates(11, 12);
break;
case 21:
if (curChar == 46)
@ -228,30 +231,43 @@ private int jjMoveNfa_3(int startState, int curPos)
jjCheckNAddTwoStates(25, 26);
break;
case 28:
case 30:
if (curChar == 47)
jjCheckNAddStates(8, 10);
break;
case 29:
if ((0xffff7fffffffffffL & l) != 0L)
jjCheckNAddStates(8, 10);
break;
case 32:
if (curChar == 47 && kind > 23)
kind = 23;
break;
case 33:
if ((0x7bffd0f8ffffd9ffL & l) == 0L)
break;
if (kind > 19)
kind = 19;
jjCheckNAddStates(3, 7);
break;
case 29:
case 34:
if ((0x7bfff8f8ffffd9ffL & l) == 0L)
break;
if (kind > 19)
kind = 19;
jjCheckNAddTwoStates(29, 30);
jjCheckNAddTwoStates(34, 35);
break;
case 31:
case 36:
if (kind > 19)
kind = 19;
jjCheckNAddTwoStates(29, 30);
jjCheckNAddTwoStates(34, 35);
break;
case 32:
case 37:
if ((0x7bfff8f8ffffd9ffL & l) != 0L)
jjCheckNAddStates(10, 12);
jjCheckNAddStates(13, 15);
break;
case 34:
jjCheckNAddStates(10, 12);
case 39:
jjCheckNAddStates(13, 15);
break;
default : break;
}
@ -264,16 +280,6 @@ private int jjMoveNfa_3(int startState, int curPos)
{
switch(jjstateSet[--i])
{
case 36:
if ((0x97ffffff87ffffffL & l) != 0L)
{
if (kind > 22)
kind = 22;
jjCheckNAddTwoStates(25, 26);
}
else if (curChar == 92)
jjCheckNAddTwoStates(27, 27);
break;
case 0:
if ((0x97ffffff87ffffffL & l) != 0L)
{
@ -282,7 +288,7 @@ private int jjMoveNfa_3(int startState, int curPos)
jjCheckNAddStates(3, 7);
}
else if (curChar == 92)
jjCheckNAddStates(13, 15);
jjCheckNAddStates(16, 18);
else if (curChar == 126)
{
if (kind > 20)
@ -304,6 +310,16 @@ private int jjMoveNfa_3(int startState, int curPos)
else if (curChar == 65)
jjstateSet[jjnewStateCnt++] = 2;
break;
case 41:
if ((0x97ffffff87ffffffL & l) != 0L)
{
if (kind > 22)
kind = 22;
jjCheckNAddTwoStates(25, 26);
}
else if (curChar == 92)
jjCheckNAddTwoStates(27, 27);
break;
case 1:
if (curChar == 68 && kind > 8)
kind = 8;
@ -385,44 +401,51 @@ private int jjMoveNfa_3(int startState, int curPos)
kind = 22;
jjCheckNAddTwoStates(25, 26);
break;
case 28:
case 29:
jjAddStates(8, 10);
break;
case 31:
if (curChar == 92)
jjstateSet[jjnewStateCnt++] = 30;
break;
case 33:
if ((0x97ffffff87ffffffL & l) == 0L)
break;
if (kind > 19)
kind = 19;
jjCheckNAddStates(3, 7);
break;
case 29:
case 34:
if ((0x97ffffff87ffffffL & l) == 0L)
break;
if (kind > 19)
kind = 19;
jjCheckNAddTwoStates(29, 30);
break;
case 30:
if (curChar == 92)
jjCheckNAddTwoStates(31, 31);
break;
case 31:
if (kind > 19)
kind = 19;
jjCheckNAddTwoStates(29, 30);
break;
case 32:
if ((0x97ffffff87ffffffL & l) != 0L)
jjCheckNAddStates(10, 12);
break;
case 33:
if (curChar == 92)
jjCheckNAddTwoStates(34, 34);
break;
case 34:
jjCheckNAddStates(10, 12);
jjCheckNAddTwoStates(34, 35);
break;
case 35:
if (curChar == 92)
jjCheckNAddTwoStates(36, 36);
break;
case 36:
if (kind > 19)
kind = 19;
jjCheckNAddTwoStates(34, 35);
break;
case 37:
if ((0x97ffffff87ffffffL & l) != 0L)
jjCheckNAddStates(13, 15);
break;
case 38:
if (curChar == 92)
jjCheckNAddTwoStates(39, 39);
break;
case 39:
jjCheckNAddStates(13, 15);
break;
case 40:
if (curChar == 92)
jjCheckNAddStates(16, 18);
break;
default : break;
}
} while(i != startsAt);
@ -438,14 +461,6 @@ private int jjMoveNfa_3(int startState, int curPos)
{
switch(jjstateSet[--i])
{
case 36:
case 25:
if (!jjCanMove_2(hiByte, i1, i2, l1, l2))
break;
if (kind > 22)
kind = 22;
jjCheckNAddTwoStates(25, 26);
break;
case 0:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
{
@ -465,6 +480,14 @@ private int jjMoveNfa_3(int startState, int curPos)
jjCheckNAddStates(3, 7);
}
break;
case 41:
case 25:
if (!jjCanMove_2(hiByte, i1, i2, l1, l2))
break;
if (kind > 22)
kind = 22;
jjCheckNAddTwoStates(25, 26);
break;
case 15:
case 17:
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
@ -484,34 +507,38 @@ private int jjMoveNfa_3(int startState, int curPos)
kind = 22;
jjCheckNAddTwoStates(25, 26);
break;
case 28:
case 29:
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
jjAddStates(8, 10);
break;
case 33:
if (!jjCanMove_2(hiByte, i1, i2, l1, l2))
break;
if (kind > 19)
kind = 19;
jjCheckNAddStates(3, 7);
break;
case 29:
case 34:
if (!jjCanMove_2(hiByte, i1, i2, l1, l2))
break;
if (kind > 19)
kind = 19;
jjCheckNAddTwoStates(29, 30);
jjCheckNAddTwoStates(34, 35);
break;
case 31:
case 36:
if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
break;
if (kind > 19)
kind = 19;
jjCheckNAddTwoStates(29, 30);
jjCheckNAddTwoStates(34, 35);
break;
case 32:
case 37:
if (jjCanMove_2(hiByte, i1, i2, l1, l2))
jjCheckNAddStates(10, 12);
jjCheckNAddStates(13, 15);
break;
case 34:
case 39:
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
jjCheckNAddStates(10, 12);
jjCheckNAddStates(13, 15);
break;
default : break;
}
@ -524,7 +551,7 @@ private int jjMoveNfa_3(int startState, int curPos)
kind = 0x7fffffff;
}
++curPos;
if ((i = jjnewStateCnt) == (startsAt = 36 - (jjnewStateCnt = startsAt)))
if ((i = jjnewStateCnt) == (startsAt = 41 - (jjnewStateCnt = startsAt)))
return curPos;
try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) { return curPos; }
@ -535,9 +562,9 @@ private final int jjStopStringLiteralDfa_1(int pos, long active0)
switch (pos)
{
case 0:
if ((active0 & 0x40000000L) != 0L)
if ((active0 & 0x80000000L) != 0L)
{
jjmatchedKind = 33;
jjmatchedKind = 34;
return 6;
}
return -1;
@ -554,9 +581,9 @@ private int jjMoveStringLiteralDfa0_1()
switch(curChar)
{
case 84:
return jjMoveStringLiteralDfa1_1(0x40000000L);
return jjMoveStringLiteralDfa1_1(0x80000000L);
case 125:
return jjStopAtPos(0, 31);
return jjStopAtPos(0, 32);
default :
return jjMoveNfa_1(0, 0);
}
@ -571,8 +598,8 @@ private int jjMoveStringLiteralDfa1_1(long active0)
switch(curChar)
{
case 79:
if ((active0 & 0x40000000L) != 0L)
return jjStartNfaWithStates_1(1, 30, 6);
if ((active0 & 0x80000000L) != 0L)
return jjStartNfaWithStates_1(1, 31, 6);
break;
default :
break;
@ -608,8 +635,8 @@ private int jjMoveNfa_1(int startState, int curPos)
case 0:
if ((0xfffffffeffffffffL & l) != 0L)
{
if (kind > 33)
kind = 33;
if (kind > 34)
kind = 34;
jjCheckNAdd(6);
}
if ((0x100002600L & l) != 0L)
@ -626,21 +653,21 @@ private int jjMoveNfa_1(int startState, int curPos)
break;
case 2:
if ((0xfffffffbffffffffL & l) != 0L)
jjCheckNAddStates(16, 18);
jjCheckNAddStates(19, 21);
break;
case 3:
if (curChar == 34)
jjCheckNAddStates(16, 18);
jjCheckNAddStates(19, 21);
break;
case 5:
if (curChar == 34 && kind > 32)
kind = 32;
if (curChar == 34 && kind > 33)
kind = 33;
break;
case 6:
if ((0xfffffffeffffffffL & l) == 0L)
break;
if (kind > 33)
kind = 33;
if (kind > 34)
kind = 34;
jjCheckNAdd(6);
break;
default : break;
@ -658,12 +685,12 @@ private int jjMoveNfa_1(int startState, int curPos)
case 6:
if ((0xdfffffffffffffffL & l) == 0L)
break;
if (kind > 33)
kind = 33;
if (kind > 34)
kind = 34;
jjCheckNAdd(6);
break;
case 2:
jjAddStates(16, 18);
jjAddStates(19, 21);
break;
case 4:
if (curChar == 92)
@ -692,20 +719,20 @@ private int jjMoveNfa_1(int startState, int curPos)
}
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
{
if (kind > 33)
kind = 33;
if (kind > 34)
kind = 34;
jjCheckNAdd(6);
}
break;
case 2:
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
jjAddStates(16, 18);
jjAddStates(19, 21);
break;
case 6:
if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
break;
if (kind > 33)
kind = 33;
if (kind > 34)
kind = 34;
jjCheckNAdd(6);
break;
default : break;
@ -750,9 +777,9 @@ private int jjMoveNfa_0(int startState, int curPos)
case 0:
if ((0x3ff000000000000L & l) == 0L)
break;
if (kind > 25)
kind = 25;
jjAddStates(19, 20);
if (kind > 26)
kind = 26;
jjAddStates(22, 23);
break;
case 1:
if (curChar == 46)
@ -761,8 +788,8 @@ private int jjMoveNfa_0(int startState, int curPos)
case 2:
if ((0x3ff000000000000L & l) == 0L)
break;
if (kind > 25)
kind = 25;
if (kind > 26)
kind = 26;
jjCheckNAdd(2);
break;
default : break;
@ -813,9 +840,9 @@ private final int jjStopStringLiteralDfa_2(int pos, long active0)
switch (pos)
{
case 0:
if ((active0 & 0x4000000L) != 0L)
if ((active0 & 0x8000000L) != 0L)
{
jjmatchedKind = 29;
jjmatchedKind = 30;
return 6;
}
return -1;
@ -832,9 +859,9 @@ private int jjMoveStringLiteralDfa0_2()
switch(curChar)
{
case 84:
return jjMoveStringLiteralDfa1_2(0x4000000L);
return jjMoveStringLiteralDfa1_2(0x8000000L);
case 93:
return jjStopAtPos(0, 27);
return jjStopAtPos(0, 28);
default :
return jjMoveNfa_2(0, 0);
}
@ -849,8 +876,8 @@ private int jjMoveStringLiteralDfa1_2(long active0)
switch(curChar)
{
case 79:
if ((active0 & 0x4000000L) != 0L)
return jjStartNfaWithStates_2(1, 26, 6);
if ((active0 & 0x8000000L) != 0L)
return jjStartNfaWithStates_2(1, 27, 6);
break;
default :
break;
@ -886,8 +913,8 @@ private int jjMoveNfa_2(int startState, int curPos)
case 0:
if ((0xfffffffeffffffffL & l) != 0L)
{
if (kind > 29)
kind = 29;
if (kind > 30)
kind = 30;
jjCheckNAdd(6);
}
if ((0x100002600L & l) != 0L)
@ -904,21 +931,21 @@ private int jjMoveNfa_2(int startState, int curPos)
break;
case 2:
if ((0xfffffffbffffffffL & l) != 0L)
jjCheckNAddStates(16, 18);
jjCheckNAddStates(19, 21);
break;
case 3:
if (curChar == 34)
jjCheckNAddStates(16, 18);
jjCheckNAddStates(19, 21);
break;
case 5:
if (curChar == 34 && kind > 28)
kind = 28;
if (curChar == 34 && kind > 29)
kind = 29;
break;
case 6:
if ((0xfffffffeffffffffL & l) == 0L)
break;
if (kind > 29)
kind = 29;
if (kind > 30)
kind = 30;
jjCheckNAdd(6);
break;
default : break;
@ -936,12 +963,12 @@ private int jjMoveNfa_2(int startState, int curPos)
case 6:
if ((0xffffffffdfffffffL & l) == 0L)
break;
if (kind > 29)
kind = 29;
if (kind > 30)
kind = 30;
jjCheckNAdd(6);
break;
case 2:
jjAddStates(16, 18);
jjAddStates(19, 21);
break;
case 4:
if (curChar == 92)
@ -970,20 +997,20 @@ private int jjMoveNfa_2(int startState, int curPos)
}
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
{
if (kind > 29)
kind = 29;
if (kind > 30)
kind = 30;
jjCheckNAdd(6);
}
break;
case 2:
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
jjAddStates(16, 18);
jjAddStates(19, 21);
break;
case 6:
if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
break;
if (kind > 29)
kind = 29;
if (kind > 30)
kind = 30;
jjCheckNAdd(6);
break;
default : break;
@ -1004,8 +1031,8 @@ private int jjMoveNfa_2(int startState, int curPos)
}
}
static final int[] jjnextStates = {
15, 16, 18, 29, 32, 23, 33, 30, 20, 21, 32, 23, 33, 31, 34, 27,
2, 4, 5, 0, 1,
15, 16, 18, 34, 37, 23, 38, 35, 29, 31, 32, 20, 21, 37, 23, 38,
36, 39, 27, 2, 4, 5, 0, 1,
};
private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2)
{
@ -1047,8 +1074,8 @@ private static final boolean jjCanMove_2(int hiByte, int i1, int i2, long l1, lo
/** Token literal values. */
public static final String[] jjstrLiteralImages = {
"", null, null, null, null, null, null, null, null, null, null, "\53", "\55",
"\50", "\51", "\72", "\52", "\136", null, null, null, null, null, "\133", "\173",
null, "\124\117", "\135", null, null, "\124\117", "\175", null, null, };
"\50", "\51", "\72", "\52", "\136", null, null, null, null, null, null, "\133",
"\173", null, "\124\117", "\135", null, null, "\124\117", "\175", null, null, };
/** Lexer state names. */
public static final String[] lexStateNames = {
@ -1060,18 +1087,18 @@ public static final String[] lexStateNames = {
/** Lex State array. */
public static final int[] jjnewLexState = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, 1,
3, -1, 3, -1, -1, -1, 3, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, 2,
1, 3, -1, 3, -1, -1, -1, 3, -1, -1,
};
static final long[] jjtoToken = {
0x3ffffff01L,
0x7ffffff01L,
};
static final long[] jjtoSkip = {
0x80L,
};
protected CharStream input_stream;
private final int[] jjrounds = new int[36];
private final int[] jjstateSet = new int[72];
private final int[] jjrounds = new int[41];
private final int[] jjstateSet = new int[82];
protected char curChar;
/** Constructor. */
public QueryParserTokenManager(CharStream stream){
@ -1096,7 +1123,7 @@ private void ReInitRounds()
{
int i;
jjround = 0x80000001;
for (i = 36; i-- > 0;)
for (i = 41; i-- > 0;)
jjrounds[i] = 0x80000000;
}

View File

@ -121,4 +121,4 @@ public class Token {
}
}
/* JavaCC - OriginalChecksum=c147cc166a7cf8812c7c39bc8c5eb868 (do not edit this line) */
/* JavaCC - OriginalChecksum=37b1923f964a5a434f5ea3d6952ff200 (do not edit this line) */

View File

@ -138,4 +138,4 @@ public class TokenMgrError extends Error
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
}
}
/* JavaCC - OriginalChecksum=1c94e13236c7e0121e49427992341ee3 (do not edit this line) */
/* JavaCC - OriginalChecksum=334e679cf1a88b3070bb8e3d80ee3f5e (do not edit this line) */

View File

@ -54,10 +54,12 @@ import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LocalizedTestCase;
import org.apache.lucene.util.automaton.BasicAutomata;
@ -1054,6 +1056,33 @@ public class TestQueryParser extends LocalizedTestCase {
}
public void testRegexps() throws Exception {
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false));
RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]"));
assertEquals(q, qp.parse("/[a-z][123]/"));
qp.setLowercaseExpandedTerms(true);
assertEquals(q, qp.parse("/[A-Z][123]/"));
q.setBoost(0.5f);
assertEquals(q, qp.parse("/[A-Z][123]/^0.5"));
qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
assertTrue(qp.parse("/[A-Z][123]/^0.5") instanceof RegexpQuery);
assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)qp.parse("/[A-Z][123]/^0.5")).getRewriteMethod());
assertEquals(q, qp.parse("/[A-Z][123]/^0.5"));
qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]"));
assertEquals(escaped, qp.parse("/[a-z]\\/[123]/"));
Query escaped2 = new RegexpQuery(new Term("field", "[a-z]\\*[123]"));
assertEquals(escaped2, qp.parse("/[a-z]\\*[123]/"));
BooleanQuery complex = new BooleanQuery();
complex.add(new RegexpQuery(new Term("field", "[a-z]\\/[123]")), Occur.MUST);
complex.add(new TermQuery(new Term("path", "/etc/init.d/")), Occur.MUST);
complex.add(new TermQuery(new Term("field", "/etc/init[.]d/lucene/")), Occur.SHOULD);
assertEquals(complex, qp.parse("/[a-z]\\/[123]/ AND path:/etc/init.d/ OR /etc\\/init\\[.\\]d/lucene/ "));
}
public void testStopwords() throws Exception {
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "a", new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true));