mirror of https://github.com/apache/lucene.git
QueryParser can now handle minimumSimilarity parameter
of FuzzyQuery; FuzzyQuery extended to allow for non-fuzzy prefixes. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150506 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
82fc8e874f
commit
7e0155537a
|
@ -73,6 +73,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
Analyzer analyzer;
|
||||
String field;
|
||||
int phraseSlop = 0;
|
||||
float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
|
||||
Locale locale = Locale.getDefault();
|
||||
|
||||
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
|
||||
|
@ -129,6 +130,19 @@ public class QueryParser implements QueryParserConstants {
|
|||
return field;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the default minimal similarity for fuzzy queries.
|
||||
*/
|
||||
public float getFuzzyMinSim() {
|
||||
return fuzzyMinSim;
|
||||
}
|
||||
/**
|
||||
*Set the default minimum similarity for fuzzy queries.
|
||||
*/
|
||||
public void setFuzzyMinSim(float fuzzyMinSim) {
|
||||
this.fuzzyMinSim = fuzzyMinSim;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the default slop for phrases. If zero, then exact phrase matches
|
||||
* are required. Default value is zero.
|
||||
|
@ -416,10 +430,10 @@ public class QueryParser implements QueryParserConstants {
|
|||
* @return Resulting {@link Query} built for the term
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getFuzzyQuery(String field, String termStr) throws ParseException
|
||||
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
|
||||
{
|
||||
Term t = new Term(field, termStr);
|
||||
return new FuzzyQuery(t);
|
||||
return new FuzzyQuery(t, minSimilarity);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -622,7 +636,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
}
|
||||
|
||||
final public Query Term(String field) throws ParseException {
|
||||
Token term, boost=null, slop=null, goop1, goop2;
|
||||
Token term, boost=null, fuzzySlop=null, goop1, goop2;
|
||||
boolean prefix = false;
|
||||
boolean wildcard = false;
|
||||
boolean fuzzy = false;
|
||||
|
@ -654,9 +668,9 @@ public class QueryParser implements QueryParserConstants {
|
|||
throw new ParseException();
|
||||
}
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case FUZZY:
|
||||
jj_consume_token(FUZZY);
|
||||
fuzzy=true;
|
||||
case FUZZY_SLOP:
|
||||
fuzzySlop = jj_consume_token(FUZZY_SLOP);
|
||||
fuzzy=true;
|
||||
break;
|
||||
default:
|
||||
jj_la1[8] = jj_gen;
|
||||
|
@ -667,9 +681,9 @@ public class QueryParser implements QueryParserConstants {
|
|||
jj_consume_token(CARAT);
|
||||
boost = jj_consume_token(NUMBER);
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case FUZZY:
|
||||
jj_consume_token(FUZZY);
|
||||
fuzzy=true;
|
||||
case FUZZY_SLOP:
|
||||
fuzzySlop = jj_consume_token(FUZZY_SLOP);
|
||||
fuzzy=true;
|
||||
break;
|
||||
default:
|
||||
jj_la1[9] = jj_gen;
|
||||
|
@ -688,7 +702,11 @@ public class QueryParser implements QueryParserConstants {
|
|||
discardEscapeChar(term.image.substring
|
||||
(0, term.image.length()-1)));
|
||||
} else if (fuzzy) {
|
||||
q = getFuzzyQuery(field, termImage);
|
||||
float fms = fuzzyMinSim;
|
||||
try {
|
||||
fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
|
||||
} catch (Exception ignored) { }
|
||||
q = getFuzzyQuery(field, termImage, fms);
|
||||
} else {
|
||||
q = getFieldQuery(field, termImage);
|
||||
}
|
||||
|
@ -809,8 +827,8 @@ public class QueryParser implements QueryParserConstants {
|
|||
case QUOTED:
|
||||
term = jj_consume_token(QUOTED);
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case SLOP:
|
||||
slop = jj_consume_token(SLOP);
|
||||
case FUZZY_SLOP:
|
||||
fuzzySlop = jj_consume_token(FUZZY_SLOP);
|
||||
break;
|
||||
default:
|
||||
jj_la1[19] = jj_gen;
|
||||
|
@ -827,9 +845,9 @@ public class QueryParser implements QueryParserConstants {
|
|||
}
|
||||
int s = phraseSlop;
|
||||
|
||||
if (slop != null) {
|
||||
if (fuzzySlop != null) {
|
||||
try {
|
||||
s = Float.valueOf(slop.image.substring(1)).intValue();
|
||||
s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
|
||||
}
|
||||
catch (Exception ignored) { }
|
||||
}
|
||||
|
@ -883,16 +901,11 @@ public class QueryParser implements QueryParserConstants {
|
|||
private int jj_gen;
|
||||
final private int[] jj_la1 = new int[22];
|
||||
static private int[] jj_la1_0;
|
||||
static private int[] jj_la1_1;
|
||||
static {
|
||||
jj_la1_0();
|
||||
jj_la1_1();
|
||||
}
|
||||
private static void jj_la1_0() {
|
||||
jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0x1f31f80,0x8000,0x1f31000,0x1320000,0x40000,0x40000,0x8000,0x18000000,0x2000000,0x18000000,0x8000,0x80000000,0x20000000,0x80000000,0x8000,0x80000,0x8000,0x1f30000,};
|
||||
}
|
||||
private static void jj_la1_1() {
|
||||
jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x1,0x0,0x0,0x0,0x0,};
|
||||
jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0xfb1f80,0x8000,0xfb1000,0x9a0000,0x40000,0x40000,0x8000,0xc000000,0x1000000,0xc000000,0x8000,0xc0000000,0x10000000,0xc0000000,0x8000,0x40000,0x8000,0xfb0000,};
|
||||
}
|
||||
final private JJCalls[] jj_2_rtns = new JJCalls[1];
|
||||
private boolean jj_rescan = false;
|
||||
|
@ -1041,8 +1054,8 @@ public class QueryParser implements QueryParserConstants {
|
|||
|
||||
public ParseException generateParseException() {
|
||||
jj_expentries.removeAllElements();
|
||||
boolean[] la1tokens = new boolean[33];
|
||||
for (int i = 0; i < 33; i++) {
|
||||
boolean[] la1tokens = new boolean[32];
|
||||
for (int i = 0; i < 32; i++) {
|
||||
la1tokens[i] = false;
|
||||
}
|
||||
if (jj_kind >= 0) {
|
||||
|
@ -1055,13 +1068,10 @@ public class QueryParser implements QueryParserConstants {
|
|||
if ((jj_la1_0[i] & (1<<j)) != 0) {
|
||||
la1tokens[j] = true;
|
||||
}
|
||||
if ((jj_la1_1[i] & (1<<j)) != 0) {
|
||||
la1tokens[32+j] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < 33; i++) {
|
||||
for (int i = 0; i < 32; i++) {
|
||||
if (la1tokens[i]) {
|
||||
jj_expentry = new int[1];
|
||||
jj_expentry[0] = i;
|
||||
|
|
|
@ -96,6 +96,7 @@ public class QueryParser {
|
|||
Analyzer analyzer;
|
||||
String field;
|
||||
int phraseSlop = 0;
|
||||
float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
|
||||
Locale locale = Locale.getDefault();
|
||||
|
||||
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
|
||||
|
@ -151,6 +152,19 @@ public class QueryParser {
|
|||
public String getField() {
|
||||
return field;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the default minimal similarity for fuzzy queries.
|
||||
*/
|
||||
public float getFuzzyMinSim() {
|
||||
return fuzzyMinSim;
|
||||
}
|
||||
/**
|
||||
*Set the default minimum similarity for fuzzy queries.
|
||||
*/
|
||||
public void setFuzzyMinSim(float fuzzyMinSim) {
|
||||
this.fuzzyMinSim = fuzzyMinSim;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the default slop for phrases. If zero, then exact phrase matches
|
||||
|
@ -439,10 +453,10 @@ public class QueryParser {
|
|||
* @return Resulting {@link Query} built for the term
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getFuzzyQuery(String field, String termStr) throws ParseException
|
||||
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
|
||||
{
|
||||
Term t = new Term(field, termStr);
|
||||
return new FuzzyQuery(t);
|
||||
return new FuzzyQuery(t, minSimilarity);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -531,8 +545,7 @@ PARSER_END(QueryParser)
|
|||
| <CARAT: "^" > : Boost
|
||||
| <QUOTED: "\"" (~["\""])+ "\"">
|
||||
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
|
||||
| <FUZZY: "~" >
|
||||
| <SLOP: "~" (<_NUM_CHAR>)+ >
|
||||
| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
|
||||
| <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
|
||||
| <WILDTERM: <_TERM_START_CHAR>
|
||||
(<_TERM_CHAR> | ( [ "*", "?" ] ))* >
|
||||
|
@ -641,7 +654,7 @@ Query Clause(String field) : {
|
|||
|
||||
|
||||
Query Term(String field) : {
|
||||
Token term, boost=null, slop=null, goop1, goop2;
|
||||
Token term, boost=null, fuzzySlop=null, goop1, goop2;
|
||||
boolean prefix = false;
|
||||
boolean wildcard = false;
|
||||
boolean fuzzy = false;
|
||||
|
@ -656,8 +669,8 @@ Query Term(String field) : {
|
|||
| term=<WILDTERM> { wildcard=true; }
|
||||
| term=<NUMBER>
|
||||
)
|
||||
[ <FUZZY> { fuzzy=true; } ]
|
||||
[ <CARAT> boost=<NUMBER> [ <FUZZY> { fuzzy=true; } ] ]
|
||||
[ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
|
||||
[ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
|
||||
{
|
||||
String termImage=discardEscapeChar(term.image);
|
||||
if (wildcard) {
|
||||
|
@ -667,7 +680,11 @@ Query Term(String field) : {
|
|||
discardEscapeChar(term.image.substring
|
||||
(0, term.image.length()-1)));
|
||||
} else if (fuzzy) {
|
||||
q = getFuzzyQuery(field, termImage);
|
||||
float fms = fuzzyMinSim;
|
||||
try {
|
||||
fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
|
||||
} catch (Exception ignored) { }
|
||||
q = getFuzzyQuery(field, termImage, fms);
|
||||
} else {
|
||||
q = getFieldQuery(field, termImage);
|
||||
}
|
||||
|
@ -708,14 +725,14 @@ Query Term(String field) : {
|
|||
q = getRangeQuery(field, goop1.image, goop2.image, false);
|
||||
}
|
||||
| term=<QUOTED>
|
||||
[ slop=<SLOP> ]
|
||||
[ fuzzySlop=<FUZZY_SLOP> ]
|
||||
[ <CARAT> boost=<NUMBER> ]
|
||||
{
|
||||
int s = phraseSlop;
|
||||
|
||||
if (slop != null) {
|
||||
if (fuzzySlop != null) {
|
||||
try {
|
||||
s = Float.valueOf(slop.image.substring(1)).intValue();
|
||||
s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
|
||||
}
|
||||
catch (Exception ignored) { }
|
||||
}
|
||||
|
|
|
@ -20,21 +20,20 @@ public interface QueryParserConstants {
|
|||
int CARAT = 15;
|
||||
int QUOTED = 16;
|
||||
int TERM = 17;
|
||||
int FUZZY = 18;
|
||||
int SLOP = 19;
|
||||
int PREFIXTERM = 20;
|
||||
int WILDTERM = 21;
|
||||
int RANGEIN_START = 22;
|
||||
int RANGEEX_START = 23;
|
||||
int NUMBER = 24;
|
||||
int RANGEIN_TO = 25;
|
||||
int RANGEIN_END = 26;
|
||||
int RANGEIN_QUOTED = 27;
|
||||
int RANGEIN_GOOP = 28;
|
||||
int RANGEEX_TO = 29;
|
||||
int RANGEEX_END = 30;
|
||||
int RANGEEX_QUOTED = 31;
|
||||
int RANGEEX_GOOP = 32;
|
||||
int FUZZY_SLOP = 18;
|
||||
int PREFIXTERM = 19;
|
||||
int WILDTERM = 20;
|
||||
int RANGEIN_START = 21;
|
||||
int RANGEEX_START = 22;
|
||||
int NUMBER = 23;
|
||||
int RANGEIN_TO = 24;
|
||||
int RANGEIN_END = 25;
|
||||
int RANGEIN_QUOTED = 26;
|
||||
int RANGEIN_GOOP = 27;
|
||||
int RANGEEX_TO = 28;
|
||||
int RANGEEX_END = 29;
|
||||
int RANGEEX_QUOTED = 30;
|
||||
int RANGEEX_GOOP = 31;
|
||||
|
||||
int Boost = 0;
|
||||
int RangeEx = 1;
|
||||
|
@ -60,8 +59,7 @@ public interface QueryParserConstants {
|
|||
"\"^\"",
|
||||
"<QUOTED>",
|
||||
"<TERM>",
|
||||
"\"~\"",
|
||||
"<SLOP>",
|
||||
"<FUZZY_SLOP>",
|
||||
"<PREFIXTERM>",
|
||||
"<WILDTERM>",
|
||||
"\"[\"",
|
||||
|
|
|
@ -54,13 +54,11 @@ private final int jjMoveStringLiteralDfa0_3()
|
|||
case 58:
|
||||
return jjStopAtPos(0, 14);
|
||||
case 91:
|
||||
return jjStopAtPos(0, 22);
|
||||
return jjStopAtPos(0, 21);
|
||||
case 94:
|
||||
return jjStopAtPos(0, 15);
|
||||
case 123:
|
||||
return jjStopAtPos(0, 23);
|
||||
case 126:
|
||||
return jjStartNfaWithStates_3(0, 18, 18);
|
||||
return jjStopAtPos(0, 22);
|
||||
default :
|
||||
return jjMoveNfa_3(0, 0);
|
||||
}
|
||||
|
@ -105,7 +103,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
{
|
||||
int[] nextStates;
|
||||
int startsAt = 0;
|
||||
jjnewStateCnt = 31;
|
||||
jjnewStateCnt = 33;
|
||||
int i = 1;
|
||||
jjstateSet[0] = startState;
|
||||
int j, kind = 0x7fffffff;
|
||||
|
@ -169,56 +167,67 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
case 18:
|
||||
if ((0x3ff000000000000L & l) == 0L)
|
||||
break;
|
||||
if (kind > 19)
|
||||
kind = 19;
|
||||
jjstateSet[jjnewStateCnt++] = 18;
|
||||
if (kind > 18)
|
||||
kind = 18;
|
||||
jjAddStates(7, 8);
|
||||
break;
|
||||
case 19:
|
||||
if (curChar == 46)
|
||||
jjCheckNAdd(20);
|
||||
break;
|
||||
case 20:
|
||||
if ((0x3ff000000000000L & l) == 0L)
|
||||
break;
|
||||
if (kind > 18)
|
||||
kind = 18;
|
||||
jjCheckNAdd(20);
|
||||
break;
|
||||
case 21:
|
||||
if ((0x7bffd0f8ffffd9ffL & l) == 0L)
|
||||
break;
|
||||
if (kind > 17)
|
||||
kind = 17;
|
||||
jjCheckNAddStates(0, 6);
|
||||
break;
|
||||
case 20:
|
||||
case 22:
|
||||
if ((0x7bfff8f8ffffd9ffL & l) == 0L)
|
||||
break;
|
||||
if (kind > 17)
|
||||
kind = 17;
|
||||
jjCheckNAddTwoStates(20, 21);
|
||||
jjCheckNAddTwoStates(22, 23);
|
||||
break;
|
||||
case 22:
|
||||
case 24:
|
||||
if ((0x84002f0600000000L & l) == 0L)
|
||||
break;
|
||||
if (kind > 17)
|
||||
kind = 17;
|
||||
jjCheckNAddTwoStates(20, 21);
|
||||
jjCheckNAddTwoStates(22, 23);
|
||||
break;
|
||||
case 23:
|
||||
case 25:
|
||||
if ((0x7bfff8f8ffffd9ffL & l) != 0L)
|
||||
jjCheckNAddStates(7, 9);
|
||||
break;
|
||||
case 24:
|
||||
if (curChar == 42 && kind > 20)
|
||||
kind = 20;
|
||||
jjCheckNAddStates(9, 11);
|
||||
break;
|
||||
case 26:
|
||||
if ((0x84002f0600000000L & l) != 0L)
|
||||
jjCheckNAddStates(7, 9);
|
||||
if (curChar == 42 && kind > 19)
|
||||
kind = 19;
|
||||
break;
|
||||
case 27:
|
||||
if ((0xfbfffcf8ffffd9ffL & l) == 0L)
|
||||
break;
|
||||
if (kind > 21)
|
||||
kind = 21;
|
||||
jjCheckNAddTwoStates(27, 28);
|
||||
case 28:
|
||||
if ((0x84002f0600000000L & l) != 0L)
|
||||
jjCheckNAddStates(9, 11);
|
||||
break;
|
||||
case 29:
|
||||
if ((0xfbfffcf8ffffd9ffL & l) == 0L)
|
||||
break;
|
||||
if (kind > 20)
|
||||
kind = 20;
|
||||
jjCheckNAddTwoStates(29, 30);
|
||||
break;
|
||||
case 31:
|
||||
if ((0x84002f0600000000L & l) == 0L)
|
||||
break;
|
||||
if (kind > 21)
|
||||
kind = 21;
|
||||
jjCheckNAddTwoStates(27, 28);
|
||||
if (kind > 20)
|
||||
kind = 20;
|
||||
jjCheckNAddTwoStates(29, 30);
|
||||
break;
|
||||
default : break;
|
||||
}
|
||||
|
@ -239,9 +248,13 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
jjCheckNAddStates(0, 6);
|
||||
}
|
||||
else if (curChar == 126)
|
||||
{
|
||||
if (kind > 18)
|
||||
kind = 18;
|
||||
jjstateSet[jjnewStateCnt++] = 18;
|
||||
}
|
||||
if (curChar == 92)
|
||||
jjCheckNAddStates(10, 12);
|
||||
jjCheckNAddStates(12, 14);
|
||||
else if (curChar == 78)
|
||||
jjstateSet[jjnewStateCnt++] = 11;
|
||||
else if (curChar == 124)
|
||||
|
@ -292,70 +305,73 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
jjstateSet[jjnewStateCnt++] = 11;
|
||||
break;
|
||||
case 15:
|
||||
jjAddStates(13, 14);
|
||||
jjAddStates(15, 16);
|
||||
break;
|
||||
case 17:
|
||||
if (curChar == 126)
|
||||
jjstateSet[jjnewStateCnt++] = 18;
|
||||
if (curChar != 126)
|
||||
break;
|
||||
if (kind > 18)
|
||||
kind = 18;
|
||||
jjstateSet[jjnewStateCnt++] = 18;
|
||||
break;
|
||||
case 19:
|
||||
case 21:
|
||||
if ((0x97ffffff97ffffffL & l) == 0L)
|
||||
break;
|
||||
if (kind > 17)
|
||||
kind = 17;
|
||||
jjCheckNAddStates(0, 6);
|
||||
break;
|
||||
case 20:
|
||||
case 22:
|
||||
if ((0x97ffffff97ffffffL & l) == 0L)
|
||||
break;
|
||||
if (kind > 17)
|
||||
kind = 17;
|
||||
jjCheckNAddTwoStates(20, 21);
|
||||
break;
|
||||
case 21:
|
||||
if (curChar == 92)
|
||||
jjCheckNAddTwoStates(22, 22);
|
||||
break;
|
||||
case 22:
|
||||
if ((0x6800000078000000L & l) == 0L)
|
||||
break;
|
||||
if (kind > 17)
|
||||
kind = 17;
|
||||
jjCheckNAddTwoStates(20, 21);
|
||||
jjCheckNAddTwoStates(22, 23);
|
||||
break;
|
||||
case 23:
|
||||
if ((0x97ffffff97ffffffL & l) != 0L)
|
||||
jjCheckNAddStates(7, 9);
|
||||
break;
|
||||
case 25:
|
||||
if (curChar == 92)
|
||||
jjCheckNAddTwoStates(26, 26);
|
||||
jjCheckNAddTwoStates(24, 24);
|
||||
break;
|
||||
case 26:
|
||||
if ((0x6800000078000000L & l) != 0L)
|
||||
jjCheckNAddStates(7, 9);
|
||||
break;
|
||||
case 27:
|
||||
if ((0x97ffffff97ffffffL & l) == 0L)
|
||||
break;
|
||||
if (kind > 21)
|
||||
kind = 21;
|
||||
jjCheckNAddTwoStates(27, 28);
|
||||
break;
|
||||
case 28:
|
||||
if (curChar == 92)
|
||||
jjCheckNAddTwoStates(29, 29);
|
||||
break;
|
||||
case 29:
|
||||
case 24:
|
||||
if ((0x6800000078000000L & l) == 0L)
|
||||
break;
|
||||
if (kind > 21)
|
||||
kind = 21;
|
||||
jjCheckNAddTwoStates(27, 28);
|
||||
if (kind > 17)
|
||||
kind = 17;
|
||||
jjCheckNAddTwoStates(22, 23);
|
||||
break;
|
||||
case 25:
|
||||
if ((0x97ffffff97ffffffL & l) != 0L)
|
||||
jjCheckNAddStates(9, 11);
|
||||
break;
|
||||
case 27:
|
||||
if (curChar == 92)
|
||||
jjCheckNAddTwoStates(28, 28);
|
||||
break;
|
||||
case 28:
|
||||
if ((0x6800000078000000L & l) != 0L)
|
||||
jjCheckNAddStates(9, 11);
|
||||
break;
|
||||
case 29:
|
||||
if ((0x97ffffff97ffffffL & l) == 0L)
|
||||
break;
|
||||
if (kind > 20)
|
||||
kind = 20;
|
||||
jjCheckNAddTwoStates(29, 30);
|
||||
break;
|
||||
case 30:
|
||||
if (curChar == 92)
|
||||
jjCheckNAddStates(10, 12);
|
||||
jjCheckNAddTwoStates(31, 31);
|
||||
break;
|
||||
case 31:
|
||||
if ((0x6800000078000000L & l) == 0L)
|
||||
break;
|
||||
if (kind > 20)
|
||||
kind = 20;
|
||||
jjCheckNAddTwoStates(29, 30);
|
||||
break;
|
||||
case 32:
|
||||
if (curChar == 92)
|
||||
jjCheckNAddStates(12, 14);
|
||||
break;
|
||||
default : break;
|
||||
}
|
||||
|
@ -381,25 +397,25 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
break;
|
||||
case 15:
|
||||
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
|
||||
jjAddStates(13, 14);
|
||||
jjAddStates(15, 16);
|
||||
break;
|
||||
case 20:
|
||||
case 22:
|
||||
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
|
||||
break;
|
||||
if (kind > 17)
|
||||
kind = 17;
|
||||
jjCheckNAddTwoStates(20, 21);
|
||||
jjCheckNAddTwoStates(22, 23);
|
||||
break;
|
||||
case 23:
|
||||
case 25:
|
||||
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
|
||||
jjCheckNAddStates(7, 9);
|
||||
jjCheckNAddStates(9, 11);
|
||||
break;
|
||||
case 27:
|
||||
case 29:
|
||||
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
|
||||
break;
|
||||
if (kind > 21)
|
||||
kind = 21;
|
||||
jjCheckNAddTwoStates(27, 28);
|
||||
if (kind > 20)
|
||||
kind = 20;
|
||||
jjCheckNAddTwoStates(29, 30);
|
||||
break;
|
||||
default : break;
|
||||
}
|
||||
|
@ -412,7 +428,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
kind = 0x7fffffff;
|
||||
}
|
||||
++curPos;
|
||||
if ((i = jjnewStateCnt) == (startsAt = 31 - (jjnewStateCnt = startsAt)))
|
||||
if ((i = jjnewStateCnt) == (startsAt = 33 - (jjnewStateCnt = startsAt)))
|
||||
return curPos;
|
||||
try { curChar = input_stream.readChar(); }
|
||||
catch(java.io.IOException e) { return curPos; }
|
||||
|
@ -423,9 +439,9 @@ private final int jjStopStringLiteralDfa_1(int pos, long active0)
|
|||
switch (pos)
|
||||
{
|
||||
case 0:
|
||||
if ((active0 & 0x20000000L) != 0L)
|
||||
if ((active0 & 0x10000000L) != 0L)
|
||||
{
|
||||
jjmatchedKind = 32;
|
||||
jjmatchedKind = 31;
|
||||
return 4;
|
||||
}
|
||||
return -1;
|
||||
|
@ -450,9 +466,9 @@ private final int jjMoveStringLiteralDfa0_1()
|
|||
switch(curChar)
|
||||
{
|
||||
case 84:
|
||||
return jjMoveStringLiteralDfa1_1(0x20000000L);
|
||||
return jjMoveStringLiteralDfa1_1(0x10000000L);
|
||||
case 125:
|
||||
return jjStopAtPos(0, 30);
|
||||
return jjStopAtPos(0, 29);
|
||||
default :
|
||||
return jjMoveNfa_1(0, 0);
|
||||
}
|
||||
|
@ -467,8 +483,8 @@ private final int jjMoveStringLiteralDfa1_1(long active0)
|
|||
switch(curChar)
|
||||
{
|
||||
case 79:
|
||||
if ((active0 & 0x20000000L) != 0L)
|
||||
return jjStartNfaWithStates_1(1, 29, 4);
|
||||
if ((active0 & 0x10000000L) != 0L)
|
||||
return jjStartNfaWithStates_1(1, 28, 4);
|
||||
break;
|
||||
default :
|
||||
break;
|
||||
|
@ -497,8 +513,8 @@ private final int jjMoveNfa_1(int startState, int curPos)
|
|||
case 0:
|
||||
if ((0xfffffffeffffffffL & l) != 0L)
|
||||
{
|
||||
if (kind > 32)
|
||||
kind = 32;
|
||||
if (kind > 31)
|
||||
kind = 31;
|
||||
jjCheckNAdd(4);
|
||||
}
|
||||
if ((0x100002600L & l) != 0L)
|
||||
|
@ -518,14 +534,14 @@ private final int jjMoveNfa_1(int startState, int curPos)
|
|||
jjCheckNAddTwoStates(2, 3);
|
||||
break;
|
||||
case 3:
|
||||
if (curChar == 34 && kind > 31)
|
||||
kind = 31;
|
||||
if (curChar == 34 && kind > 30)
|
||||
kind = 30;
|
||||
break;
|
||||
case 4:
|
||||
if ((0xfffffffeffffffffL & l) == 0L)
|
||||
break;
|
||||
if (kind > 32)
|
||||
kind = 32;
|
||||
if (kind > 31)
|
||||
kind = 31;
|
||||
jjCheckNAdd(4);
|
||||
break;
|
||||
default : break;
|
||||
|
@ -543,12 +559,12 @@ private final int jjMoveNfa_1(int startState, int curPos)
|
|||
case 4:
|
||||
if ((0xdfffffffffffffffL & l) == 0L)
|
||||
break;
|
||||
if (kind > 32)
|
||||
kind = 32;
|
||||
if (kind > 31)
|
||||
kind = 31;
|
||||
jjCheckNAdd(4);
|
||||
break;
|
||||
case 2:
|
||||
jjAddStates(15, 16);
|
||||
jjAddStates(17, 18);
|
||||
break;
|
||||
default : break;
|
||||
}
|
||||
|
@ -569,13 +585,13 @@ private final int jjMoveNfa_1(int startState, int curPos)
|
|||
case 4:
|
||||
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
|
||||
break;
|
||||
if (kind > 32)
|
||||
kind = 32;
|
||||
if (kind > 31)
|
||||
kind = 31;
|
||||
jjCheckNAdd(4);
|
||||
break;
|
||||
case 2:
|
||||
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
|
||||
jjAddStates(15, 16);
|
||||
jjAddStates(17, 18);
|
||||
break;
|
||||
default : break;
|
||||
}
|
||||
|
@ -620,9 +636,9 @@ private final int jjMoveNfa_0(int startState, int curPos)
|
|||
case 0:
|
||||
if ((0x3ff000000000000L & l) == 0L)
|
||||
break;
|
||||
if (kind > 24)
|
||||
kind = 24;
|
||||
jjAddStates(17, 18);
|
||||
if (kind > 23)
|
||||
kind = 23;
|
||||
jjAddStates(19, 20);
|
||||
break;
|
||||
case 1:
|
||||
if (curChar == 46)
|
||||
|
@ -631,8 +647,8 @@ private final int jjMoveNfa_0(int startState, int curPos)
|
|||
case 2:
|
||||
if ((0x3ff000000000000L & l) == 0L)
|
||||
break;
|
||||
if (kind > 24)
|
||||
kind = 24;
|
||||
if (kind > 23)
|
||||
kind = 23;
|
||||
jjCheckNAdd(2);
|
||||
break;
|
||||
default : break;
|
||||
|
@ -683,9 +699,9 @@ private final int jjStopStringLiteralDfa_2(int pos, long active0)
|
|||
switch (pos)
|
||||
{
|
||||
case 0:
|
||||
if ((active0 & 0x2000000L) != 0L)
|
||||
if ((active0 & 0x1000000L) != 0L)
|
||||
{
|
||||
jjmatchedKind = 28;
|
||||
jjmatchedKind = 27;
|
||||
return 4;
|
||||
}
|
||||
return -1;
|
||||
|
@ -710,9 +726,9 @@ private final int jjMoveStringLiteralDfa0_2()
|
|||
switch(curChar)
|
||||
{
|
||||
case 84:
|
||||
return jjMoveStringLiteralDfa1_2(0x2000000L);
|
||||
return jjMoveStringLiteralDfa1_2(0x1000000L);
|
||||
case 93:
|
||||
return jjStopAtPos(0, 26);
|
||||
return jjStopAtPos(0, 25);
|
||||
default :
|
||||
return jjMoveNfa_2(0, 0);
|
||||
}
|
||||
|
@ -727,8 +743,8 @@ private final int jjMoveStringLiteralDfa1_2(long active0)
|
|||
switch(curChar)
|
||||
{
|
||||
case 79:
|
||||
if ((active0 & 0x2000000L) != 0L)
|
||||
return jjStartNfaWithStates_2(1, 25, 4);
|
||||
if ((active0 & 0x1000000L) != 0L)
|
||||
return jjStartNfaWithStates_2(1, 24, 4);
|
||||
break;
|
||||
default :
|
||||
break;
|
||||
|
@ -757,8 +773,8 @@ private final int jjMoveNfa_2(int startState, int curPos)
|
|||
case 0:
|
||||
if ((0xfffffffeffffffffL & l) != 0L)
|
||||
{
|
||||
if (kind > 28)
|
||||
kind = 28;
|
||||
if (kind > 27)
|
||||
kind = 27;
|
||||
jjCheckNAdd(4);
|
||||
}
|
||||
if ((0x100002600L & l) != 0L)
|
||||
|
@ -778,14 +794,14 @@ private final int jjMoveNfa_2(int startState, int curPos)
|
|||
jjCheckNAddTwoStates(2, 3);
|
||||
break;
|
||||
case 3:
|
||||
if (curChar == 34 && kind > 27)
|
||||
kind = 27;
|
||||
if (curChar == 34 && kind > 26)
|
||||
kind = 26;
|
||||
break;
|
||||
case 4:
|
||||
if ((0xfffffffeffffffffL & l) == 0L)
|
||||
break;
|
||||
if (kind > 28)
|
||||
kind = 28;
|
||||
if (kind > 27)
|
||||
kind = 27;
|
||||
jjCheckNAdd(4);
|
||||
break;
|
||||
default : break;
|
||||
|
@ -803,12 +819,12 @@ private final int jjMoveNfa_2(int startState, int curPos)
|
|||
case 4:
|
||||
if ((0xffffffffdfffffffL & l) == 0L)
|
||||
break;
|
||||
if (kind > 28)
|
||||
kind = 28;
|
||||
if (kind > 27)
|
||||
kind = 27;
|
||||
jjCheckNAdd(4);
|
||||
break;
|
||||
case 2:
|
||||
jjAddStates(15, 16);
|
||||
jjAddStates(17, 18);
|
||||
break;
|
||||
default : break;
|
||||
}
|
||||
|
@ -829,13 +845,13 @@ private final int jjMoveNfa_2(int startState, int curPos)
|
|||
case 4:
|
||||
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
|
||||
break;
|
||||
if (kind > 28)
|
||||
kind = 28;
|
||||
if (kind > 27)
|
||||
kind = 27;
|
||||
jjCheckNAdd(4);
|
||||
break;
|
||||
case 2:
|
||||
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
|
||||
jjAddStates(15, 16);
|
||||
jjAddStates(17, 18);
|
||||
break;
|
||||
default : break;
|
||||
}
|
||||
|
@ -855,8 +871,8 @@ private final int jjMoveNfa_2(int startState, int curPos)
|
|||
}
|
||||
}
|
||||
static final int[] jjnextStates = {
|
||||
20, 23, 24, 27, 28, 25, 21, 23, 24, 25, 22, 26, 29, 15, 16, 2,
|
||||
3, 0, 1,
|
||||
22, 25, 26, 29, 30, 27, 23, 18, 19, 25, 26, 27, 24, 28, 31, 15,
|
||||
16, 2, 3, 0, 1,
|
||||
};
|
||||
private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2)
|
||||
{
|
||||
|
@ -872,8 +888,8 @@ private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, lo
|
|||
}
|
||||
public static final String[] jjstrLiteralImages = {
|
||||
"", null, null, null, null, null, null, null, null, null, "\53", "\55", "\50",
|
||||
"\51", "\72", "\136", null, null, "\176", null, null, null, "\133", "\173", null,
|
||||
"\124\117", "\135", null, null, "\124\117", "\175", null, null, };
|
||||
"\51", "\72", "\136", null, null, null, null, null, "\133", "\173", null, "\124\117",
|
||||
"\135", null, null, "\124\117", "\175", null, null, };
|
||||
public static final String[] lexStateNames = {
|
||||
"Boost",
|
||||
"RangeEx",
|
||||
|
@ -881,18 +897,18 @@ public static final String[] lexStateNames = {
|
|||
"DEFAULT",
|
||||
};
|
||||
public static final int[] jjnewLexState = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, 2, 1, 3,
|
||||
-1, 3, -1, -1, -1, 3, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, 1, 3, -1,
|
||||
3, -1, -1, -1, 3, -1, -1,
|
||||
};
|
||||
static final long[] jjtoToken = {
|
||||
0x1ffffff81L,
|
||||
0xffffff81L,
|
||||
};
|
||||
static final long[] jjtoSkip = {
|
||||
0x40L,
|
||||
};
|
||||
protected CharStream input_stream;
|
||||
private final int[] jjrounds = new int[31];
|
||||
private final int[] jjstateSet = new int[62];
|
||||
private final int[] jjrounds = new int[33];
|
||||
private final int[] jjstateSet = new int[66];
|
||||
protected char curChar;
|
||||
public QueryParserTokenManager(CharStream stream)
|
||||
{
|
||||
|
@ -914,7 +930,7 @@ private final void ReInitRounds()
|
|||
{
|
||||
int i;
|
||||
jjround = 0x80000001;
|
||||
for (i = 31; i-- > 0;)
|
||||
for (i = 33; i-- > 0;)
|
||||
jjrounds[i] = 0x80000000;
|
||||
}
|
||||
public void ReInit(CharStream stream, int lexState)
|
||||
|
|
|
@ -25,11 +25,15 @@ import java.io.IOException;
|
|||
*/
|
||||
public final class FuzzyQuery extends MultiTermQuery {
|
||||
|
||||
public final static float defaultMinSimilarity = 0.5f;
|
||||
private float minimumSimilarity;
|
||||
private int prefixLength;
|
||||
|
||||
/**
|
||||
* Create a new FuzzyQuery that will match terms with a similarity
|
||||
* of at least <code>minimumSimilarity</code> to <code>term</code>.
|
||||
* If a <code>prefixLength</code> > 0 is specified, a common prefix
|
||||
* of that length is also required.
|
||||
*
|
||||
* @param term the term to search for
|
||||
* @param minimumSimilarity a value between 0 and 1 to set the required similarity
|
||||
|
@ -37,29 +41,45 @@ public final class FuzzyQuery extends MultiTermQuery {
|
|||
* <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length
|
||||
* as the query term is considered similar to the query term if the edit distance
|
||||
* between both terms is less than <code>length(term)*0.5</code>.
|
||||
* @param prefixLength length of common prefix.
|
||||
* @throws IllegalArgumentException if minimumSimilarity is > 1 or < 0
|
||||
* or if prefixLength < 0 or > <code>term.text().length()</code>.
|
||||
*/
|
||||
public FuzzyQuery(Term term, float minimumSimilarity) throws IllegalArgumentException {
|
||||
public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength) throws IllegalArgumentException {
|
||||
super(term);
|
||||
|
||||
if (minimumSimilarity > 1.0f)
|
||||
throw new IllegalArgumentException("minimumSimilarity > 1");
|
||||
else if (minimumSimilarity < 0.0f)
|
||||
throw new IllegalArgumentException("minimumSimilarity < 0");
|
||||
this.minimumSimilarity = minimumSimilarity;
|
||||
|
||||
if(prefixLength < 0)
|
||||
throw new IllegalArgumentException("prefixLength < 0");
|
||||
else if(prefixLength >= term.text().length())
|
||||
throw new IllegalArgumentException("prefixLength >= term.text().length()");
|
||||
this.prefixLength = prefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}.
|
||||
*/
|
||||
public FuzzyQuery(Term term, float minimumSimilarity) throws IllegalArgumentException {
|
||||
this(term, minimumSimilarity, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f)}.
|
||||
* Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}.
|
||||
*/
|
||||
public FuzzyQuery(Term term) {
|
||||
this(term, 0.5f);
|
||||
this(term, defaultMinSimilarity, 0);
|
||||
}
|
||||
|
||||
protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
|
||||
return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity);
|
||||
return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength);
|
||||
}
|
||||
|
||||
public String toString(String field) {
|
||||
return super.toString(field) + '~';
|
||||
return super.toString(field) + '~' + Float.toString(minimumSimilarity);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,15 +32,49 @@ public final class FuzzyTermEnum extends FilteredTermEnum {
|
|||
String field = "";
|
||||
String text = "";
|
||||
int textlen;
|
||||
String prefix = "";
|
||||
int prefixLength = 0;
|
||||
float minimumSimilarity;
|
||||
double scale_factor;
|
||||
|
||||
|
||||
/**
|
||||
* Empty prefix and minSimilarity of 0.5f are used.
|
||||
*
|
||||
* @param reader
|
||||
* @param term
|
||||
* @throws IOException
|
||||
* @see #FuzzyTermEnum(IndexReader, Term, float, int)
|
||||
*/
|
||||
public FuzzyTermEnum(IndexReader reader, Term term) throws IOException {
|
||||
this(reader, term, 0.5f);
|
||||
this(reader, term, FuzzyQuery.defaultMinSimilarity, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is the standard FuzzyTermEnum with an empty prefix.
|
||||
*
|
||||
* @param reader
|
||||
* @param term
|
||||
* @param minSimilarity
|
||||
* @throws IOException
|
||||
* @see #FuzzyTermEnum(IndexReader, Term, float, int)
|
||||
*/
|
||||
public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity) throws IOException {
|
||||
this(reader, term, minSimilarity, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
|
||||
* length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity >
|
||||
* <code>minSimilarity</code>.
|
||||
*
|
||||
* @param reader Delivers terms.
|
||||
* @param term Pattern term.
|
||||
* @param minSimilarity Minimum required similarity for terms from the reader. Default value is 0.5f.
|
||||
* @param prefixLength Length of required common prefix. Default value is 0.
|
||||
* @throws IOException
|
||||
*/
|
||||
public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity, int prefixLength) throws IOException {
|
||||
super();
|
||||
minimumSimilarity = minSimilarity;
|
||||
scale_factor = 1.0f / (1.0f - minimumSimilarity);
|
||||
|
@ -48,7 +82,13 @@ public final class FuzzyTermEnum extends FilteredTermEnum {
|
|||
field = searchTerm.field();
|
||||
text = searchTerm.text();
|
||||
textlen = text.length();
|
||||
setEnum(reader.terms(new Term(searchTerm.field(), "")));
|
||||
if(prefixLength > 0 && prefixLength < textlen){
|
||||
this.prefixLength = prefixLength;
|
||||
prefix = text.substring(0, prefixLength);
|
||||
text = text.substring(prefixLength);
|
||||
textlen = text.length();
|
||||
}
|
||||
setEnum(reader.terms(new Term(searchTerm.field(), prefix)));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -56,8 +96,9 @@ public final class FuzzyTermEnum extends FilteredTermEnum {
|
|||
calculate the distance between the given term and the comparing term.
|
||||
*/
|
||||
protected final boolean termCompare(Term term) {
|
||||
if (field == term.field()) {
|
||||
String target = term.text();
|
||||
String termText = term.text();
|
||||
if (field == term.field() && termText.startsWith(prefix)) {
|
||||
String target = termText.substring(prefixLength);
|
||||
int targetlen = target.length();
|
||||
int dist = editDistance(text, target, textlen, targetlen);
|
||||
distance = 1 - ((double)dist / (double)Math.min(textlen, targetlen));
|
||||
|
|
Loading…
Reference in New Issue