QueryParser can now handle minimumSimilarity parameter

of FuzzyQuery; FuzzyQuery extended to allow for non-fuzzy
prefixes.


git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150506 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Christoph Goller 2004-09-14 13:45:15 +00:00
parent 82fc8e874f
commit 7e0155537a
6 changed files with 298 additions and 196 deletions

View File

@ -73,6 +73,7 @@ public class QueryParser implements QueryParserConstants {
Analyzer analyzer;
String field;
int phraseSlop = 0;
float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
Locale locale = Locale.getDefault();
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
@ -129,6 +130,19 @@ public class QueryParser implements QueryParserConstants {
return field;
}
/**
* Get the default minimal similarity for fuzzy queries.
*/
public float getFuzzyMinSim() {
return fuzzyMinSim;
}
/**
*Set the default minimum similarity for fuzzy queries.
*/
public void setFuzzyMinSim(float fuzzyMinSim) {
this.fuzzyMinSim = fuzzyMinSim;
}
/**
* Sets the default slop for phrases. If zero, then exact phrase matches
* are required. Default value is zero.
@ -416,10 +430,10 @@ public class QueryParser implements QueryParserConstants {
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
protected Query getFuzzyQuery(String field, String termStr) throws ParseException
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
{
Term t = new Term(field, termStr);
return new FuzzyQuery(t);
return new FuzzyQuery(t, minSimilarity);
}
/**
@ -622,7 +636,7 @@ public class QueryParser implements QueryParserConstants {
}
final public Query Term(String field) throws ParseException {
Token term, boost=null, slop=null, goop1, goop2;
Token term, boost=null, fuzzySlop=null, goop1, goop2;
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
@ -654,9 +668,9 @@ public class QueryParser implements QueryParserConstants {
throw new ParseException();
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case FUZZY:
jj_consume_token(FUZZY);
fuzzy=true;
case FUZZY_SLOP:
fuzzySlop = jj_consume_token(FUZZY_SLOP);
fuzzy=true;
break;
default:
jj_la1[8] = jj_gen;
@ -667,9 +681,9 @@ public class QueryParser implements QueryParserConstants {
jj_consume_token(CARAT);
boost = jj_consume_token(NUMBER);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case FUZZY:
jj_consume_token(FUZZY);
fuzzy=true;
case FUZZY_SLOP:
fuzzySlop = jj_consume_token(FUZZY_SLOP);
fuzzy=true;
break;
default:
jj_la1[9] = jj_gen;
@ -688,7 +702,11 @@ public class QueryParser implements QueryParserConstants {
discardEscapeChar(term.image.substring
(0, term.image.length()-1)));
} else if (fuzzy) {
q = getFuzzyQuery(field, termImage);
float fms = fuzzyMinSim;
try {
fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
} catch (Exception ignored) { }
q = getFuzzyQuery(field, termImage, fms);
} else {
q = getFieldQuery(field, termImage);
}
@ -809,8 +827,8 @@ public class QueryParser implements QueryParserConstants {
case QUOTED:
term = jj_consume_token(QUOTED);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case SLOP:
slop = jj_consume_token(SLOP);
case FUZZY_SLOP:
fuzzySlop = jj_consume_token(FUZZY_SLOP);
break;
default:
jj_la1[19] = jj_gen;
@ -827,9 +845,9 @@ public class QueryParser implements QueryParserConstants {
}
int s = phraseSlop;
if (slop != null) {
if (fuzzySlop != null) {
try {
s = Float.valueOf(slop.image.substring(1)).intValue();
s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
}
catch (Exception ignored) { }
}
@ -883,16 +901,11 @@ public class QueryParser implements QueryParserConstants {
private int jj_gen;
final private int[] jj_la1 = new int[22];
static private int[] jj_la1_0;
static private int[] jj_la1_1;
static {
jj_la1_0();
jj_la1_1();
}
private static void jj_la1_0() {
jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0x1f31f80,0x8000,0x1f31000,0x1320000,0x40000,0x40000,0x8000,0x18000000,0x2000000,0x18000000,0x8000,0x80000000,0x20000000,0x80000000,0x8000,0x80000,0x8000,0x1f30000,};
}
private static void jj_la1_1() {
jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x1,0x0,0x0,0x0,0x0,};
jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0xfb1f80,0x8000,0xfb1000,0x9a0000,0x40000,0x40000,0x8000,0xc000000,0x1000000,0xc000000,0x8000,0xc0000000,0x10000000,0xc0000000,0x8000,0x40000,0x8000,0xfb0000,};
}
final private JJCalls[] jj_2_rtns = new JJCalls[1];
private boolean jj_rescan = false;
@ -1041,8 +1054,8 @@ public class QueryParser implements QueryParserConstants {
public ParseException generateParseException() {
jj_expentries.removeAllElements();
boolean[] la1tokens = new boolean[33];
for (int i = 0; i < 33; i++) {
boolean[] la1tokens = new boolean[32];
for (int i = 0; i < 32; i++) {
la1tokens[i] = false;
}
if (jj_kind >= 0) {
@ -1055,13 +1068,10 @@ public class QueryParser implements QueryParserConstants {
if ((jj_la1_0[i] & (1<<j)) != 0) {
la1tokens[j] = true;
}
if ((jj_la1_1[i] & (1<<j)) != 0) {
la1tokens[32+j] = true;
}
}
}
}
for (int i = 0; i < 33; i++) {
for (int i = 0; i < 32; i++) {
if (la1tokens[i]) {
jj_expentry = new int[1];
jj_expentry[0] = i;

View File

@ -96,6 +96,7 @@ public class QueryParser {
Analyzer analyzer;
String field;
int phraseSlop = 0;
float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
Locale locale = Locale.getDefault();
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
@ -151,6 +152,19 @@ public class QueryParser {
public String getField() {
return field;
}
/**
* Get the default minimal similarity for fuzzy queries.
*/
public float getFuzzyMinSim() {
return fuzzyMinSim;
}
/**
*Set the default minimum similarity for fuzzy queries.
*/
public void setFuzzyMinSim(float fuzzyMinSim) {
this.fuzzyMinSim = fuzzyMinSim;
}
/**
* Sets the default slop for phrases. If zero, then exact phrase matches
@ -439,10 +453,10 @@ public class QueryParser {
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
protected Query getFuzzyQuery(String field, String termStr) throws ParseException
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
{
Term t = new Term(field, termStr);
return new FuzzyQuery(t);
return new FuzzyQuery(t, minSimilarity);
}
/**
@ -531,8 +545,7 @@ PARSER_END(QueryParser)
| <CARAT: "^" > : Boost
| <QUOTED: "\"" (~["\""])+ "\"">
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
| <FUZZY: "~" >
| <SLOP: "~" (<_NUM_CHAR>)+ >
| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
| <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
| <WILDTERM: <_TERM_START_CHAR>
(<_TERM_CHAR> | ( [ "*", "?" ] ))* >
@ -641,7 +654,7 @@ Query Clause(String field) : {
Query Term(String field) : {
Token term, boost=null, slop=null, goop1, goop2;
Token term, boost=null, fuzzySlop=null, goop1, goop2;
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
@ -656,8 +669,8 @@ Query Term(String field) : {
| term=<WILDTERM> { wildcard=true; }
| term=<NUMBER>
)
[ <FUZZY> { fuzzy=true; } ]
[ <CARAT> boost=<NUMBER> [ <FUZZY> { fuzzy=true; } ] ]
[ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
[ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
{
String termImage=discardEscapeChar(term.image);
if (wildcard) {
@ -667,7 +680,11 @@ Query Term(String field) : {
discardEscapeChar(term.image.substring
(0, term.image.length()-1)));
} else if (fuzzy) {
q = getFuzzyQuery(field, termImage);
float fms = fuzzyMinSim;
try {
fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
} catch (Exception ignored) { }
q = getFuzzyQuery(field, termImage, fms);
} else {
q = getFieldQuery(field, termImage);
}
@ -708,14 +725,14 @@ Query Term(String field) : {
q = getRangeQuery(field, goop1.image, goop2.image, false);
}
| term=<QUOTED>
[ slop=<SLOP> ]
[ fuzzySlop=<FUZZY_SLOP> ]
[ <CARAT> boost=<NUMBER> ]
{
int s = phraseSlop;
if (slop != null) {
if (fuzzySlop != null) {
try {
s = Float.valueOf(slop.image.substring(1)).intValue();
s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
}
catch (Exception ignored) { }
}

View File

@ -20,21 +20,20 @@ public interface QueryParserConstants {
int CARAT = 15;
int QUOTED = 16;
int TERM = 17;
int FUZZY = 18;
int SLOP = 19;
int PREFIXTERM = 20;
int WILDTERM = 21;
int RANGEIN_START = 22;
int RANGEEX_START = 23;
int NUMBER = 24;
int RANGEIN_TO = 25;
int RANGEIN_END = 26;
int RANGEIN_QUOTED = 27;
int RANGEIN_GOOP = 28;
int RANGEEX_TO = 29;
int RANGEEX_END = 30;
int RANGEEX_QUOTED = 31;
int RANGEEX_GOOP = 32;
int FUZZY_SLOP = 18;
int PREFIXTERM = 19;
int WILDTERM = 20;
int RANGEIN_START = 21;
int RANGEEX_START = 22;
int NUMBER = 23;
int RANGEIN_TO = 24;
int RANGEIN_END = 25;
int RANGEIN_QUOTED = 26;
int RANGEIN_GOOP = 27;
int RANGEEX_TO = 28;
int RANGEEX_END = 29;
int RANGEEX_QUOTED = 30;
int RANGEEX_GOOP = 31;
int Boost = 0;
int RangeEx = 1;
@ -60,8 +59,7 @@ public interface QueryParserConstants {
"\"^\"",
"<QUOTED>",
"<TERM>",
"\"~\"",
"<SLOP>",
"<FUZZY_SLOP>",
"<PREFIXTERM>",
"<WILDTERM>",
"\"[\"",

View File

@ -54,13 +54,11 @@ private final int jjMoveStringLiteralDfa0_3()
case 58:
return jjStopAtPos(0, 14);
case 91:
return jjStopAtPos(0, 22);
return jjStopAtPos(0, 21);
case 94:
return jjStopAtPos(0, 15);
case 123:
return jjStopAtPos(0, 23);
case 126:
return jjStartNfaWithStates_3(0, 18, 18);
return jjStopAtPos(0, 22);
default :
return jjMoveNfa_3(0, 0);
}
@ -105,7 +103,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
{
int[] nextStates;
int startsAt = 0;
jjnewStateCnt = 31;
jjnewStateCnt = 33;
int i = 1;
jjstateSet[0] = startState;
int j, kind = 0x7fffffff;
@ -169,56 +167,67 @@ private final int jjMoveNfa_3(int startState, int curPos)
case 18:
if ((0x3ff000000000000L & l) == 0L)
break;
if (kind > 19)
kind = 19;
jjstateSet[jjnewStateCnt++] = 18;
if (kind > 18)
kind = 18;
jjAddStates(7, 8);
break;
case 19:
if (curChar == 46)
jjCheckNAdd(20);
break;
case 20:
if ((0x3ff000000000000L & l) == 0L)
break;
if (kind > 18)
kind = 18;
jjCheckNAdd(20);
break;
case 21:
if ((0x7bffd0f8ffffd9ffL & l) == 0L)
break;
if (kind > 17)
kind = 17;
jjCheckNAddStates(0, 6);
break;
case 20:
case 22:
if ((0x7bfff8f8ffffd9ffL & l) == 0L)
break;
if (kind > 17)
kind = 17;
jjCheckNAddTwoStates(20, 21);
jjCheckNAddTwoStates(22, 23);
break;
case 22:
case 24:
if ((0x84002f0600000000L & l) == 0L)
break;
if (kind > 17)
kind = 17;
jjCheckNAddTwoStates(20, 21);
jjCheckNAddTwoStates(22, 23);
break;
case 23:
case 25:
if ((0x7bfff8f8ffffd9ffL & l) != 0L)
jjCheckNAddStates(7, 9);
break;
case 24:
if (curChar == 42 && kind > 20)
kind = 20;
jjCheckNAddStates(9, 11);
break;
case 26:
if ((0x84002f0600000000L & l) != 0L)
jjCheckNAddStates(7, 9);
if (curChar == 42 && kind > 19)
kind = 19;
break;
case 27:
if ((0xfbfffcf8ffffd9ffL & l) == 0L)
break;
if (kind > 21)
kind = 21;
jjCheckNAddTwoStates(27, 28);
case 28:
if ((0x84002f0600000000L & l) != 0L)
jjCheckNAddStates(9, 11);
break;
case 29:
if ((0xfbfffcf8ffffd9ffL & l) == 0L)
break;
if (kind > 20)
kind = 20;
jjCheckNAddTwoStates(29, 30);
break;
case 31:
if ((0x84002f0600000000L & l) == 0L)
break;
if (kind > 21)
kind = 21;
jjCheckNAddTwoStates(27, 28);
if (kind > 20)
kind = 20;
jjCheckNAddTwoStates(29, 30);
break;
default : break;
}
@ -239,9 +248,13 @@ private final int jjMoveNfa_3(int startState, int curPos)
jjCheckNAddStates(0, 6);
}
else if (curChar == 126)
{
if (kind > 18)
kind = 18;
jjstateSet[jjnewStateCnt++] = 18;
}
if (curChar == 92)
jjCheckNAddStates(10, 12);
jjCheckNAddStates(12, 14);
else if (curChar == 78)
jjstateSet[jjnewStateCnt++] = 11;
else if (curChar == 124)
@ -292,70 +305,73 @@ private final int jjMoveNfa_3(int startState, int curPos)
jjstateSet[jjnewStateCnt++] = 11;
break;
case 15:
jjAddStates(13, 14);
jjAddStates(15, 16);
break;
case 17:
if (curChar == 126)
jjstateSet[jjnewStateCnt++] = 18;
if (curChar != 126)
break;
if (kind > 18)
kind = 18;
jjstateSet[jjnewStateCnt++] = 18;
break;
case 19:
case 21:
if ((0x97ffffff97ffffffL & l) == 0L)
break;
if (kind > 17)
kind = 17;
jjCheckNAddStates(0, 6);
break;
case 20:
case 22:
if ((0x97ffffff97ffffffL & l) == 0L)
break;
if (kind > 17)
kind = 17;
jjCheckNAddTwoStates(20, 21);
break;
case 21:
if (curChar == 92)
jjCheckNAddTwoStates(22, 22);
break;
case 22:
if ((0x6800000078000000L & l) == 0L)
break;
if (kind > 17)
kind = 17;
jjCheckNAddTwoStates(20, 21);
jjCheckNAddTwoStates(22, 23);
break;
case 23:
if ((0x97ffffff97ffffffL & l) != 0L)
jjCheckNAddStates(7, 9);
break;
case 25:
if (curChar == 92)
jjCheckNAddTwoStates(26, 26);
jjCheckNAddTwoStates(24, 24);
break;
case 26:
if ((0x6800000078000000L & l) != 0L)
jjCheckNAddStates(7, 9);
break;
case 27:
if ((0x97ffffff97ffffffL & l) == 0L)
break;
if (kind > 21)
kind = 21;
jjCheckNAddTwoStates(27, 28);
break;
case 28:
if (curChar == 92)
jjCheckNAddTwoStates(29, 29);
break;
case 29:
case 24:
if ((0x6800000078000000L & l) == 0L)
break;
if (kind > 21)
kind = 21;
jjCheckNAddTwoStates(27, 28);
if (kind > 17)
kind = 17;
jjCheckNAddTwoStates(22, 23);
break;
case 25:
if ((0x97ffffff97ffffffL & l) != 0L)
jjCheckNAddStates(9, 11);
break;
case 27:
if (curChar == 92)
jjCheckNAddTwoStates(28, 28);
break;
case 28:
if ((0x6800000078000000L & l) != 0L)
jjCheckNAddStates(9, 11);
break;
case 29:
if ((0x97ffffff97ffffffL & l) == 0L)
break;
if (kind > 20)
kind = 20;
jjCheckNAddTwoStates(29, 30);
break;
case 30:
if (curChar == 92)
jjCheckNAddStates(10, 12);
jjCheckNAddTwoStates(31, 31);
break;
case 31:
if ((0x6800000078000000L & l) == 0L)
break;
if (kind > 20)
kind = 20;
jjCheckNAddTwoStates(29, 30);
break;
case 32:
if (curChar == 92)
jjCheckNAddStates(12, 14);
break;
default : break;
}
@ -381,25 +397,25 @@ private final int jjMoveNfa_3(int startState, int curPos)
break;
case 15:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
jjAddStates(13, 14);
jjAddStates(15, 16);
break;
case 20:
case 22:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
if (kind > 17)
kind = 17;
jjCheckNAddTwoStates(20, 21);
jjCheckNAddTwoStates(22, 23);
break;
case 23:
case 25:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
jjCheckNAddStates(7, 9);
jjCheckNAddStates(9, 11);
break;
case 27:
case 29:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
if (kind > 21)
kind = 21;
jjCheckNAddTwoStates(27, 28);
if (kind > 20)
kind = 20;
jjCheckNAddTwoStates(29, 30);
break;
default : break;
}
@ -412,7 +428,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
kind = 0x7fffffff;
}
++curPos;
if ((i = jjnewStateCnt) == (startsAt = 31 - (jjnewStateCnt = startsAt)))
if ((i = jjnewStateCnt) == (startsAt = 33 - (jjnewStateCnt = startsAt)))
return curPos;
try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) { return curPos; }
@ -423,9 +439,9 @@ private final int jjStopStringLiteralDfa_1(int pos, long active0)
switch (pos)
{
case 0:
if ((active0 & 0x20000000L) != 0L)
if ((active0 & 0x10000000L) != 0L)
{
jjmatchedKind = 32;
jjmatchedKind = 31;
return 4;
}
return -1;
@ -450,9 +466,9 @@ private final int jjMoveStringLiteralDfa0_1()
switch(curChar)
{
case 84:
return jjMoveStringLiteralDfa1_1(0x20000000L);
return jjMoveStringLiteralDfa1_1(0x10000000L);
case 125:
return jjStopAtPos(0, 30);
return jjStopAtPos(0, 29);
default :
return jjMoveNfa_1(0, 0);
}
@ -467,8 +483,8 @@ private final int jjMoveStringLiteralDfa1_1(long active0)
switch(curChar)
{
case 79:
if ((active0 & 0x20000000L) != 0L)
return jjStartNfaWithStates_1(1, 29, 4);
if ((active0 & 0x10000000L) != 0L)
return jjStartNfaWithStates_1(1, 28, 4);
break;
default :
break;
@ -497,8 +513,8 @@ private final int jjMoveNfa_1(int startState, int curPos)
case 0:
if ((0xfffffffeffffffffL & l) != 0L)
{
if (kind > 32)
kind = 32;
if (kind > 31)
kind = 31;
jjCheckNAdd(4);
}
if ((0x100002600L & l) != 0L)
@ -518,14 +534,14 @@ private final int jjMoveNfa_1(int startState, int curPos)
jjCheckNAddTwoStates(2, 3);
break;
case 3:
if (curChar == 34 && kind > 31)
kind = 31;
if (curChar == 34 && kind > 30)
kind = 30;
break;
case 4:
if ((0xfffffffeffffffffL & l) == 0L)
break;
if (kind > 32)
kind = 32;
if (kind > 31)
kind = 31;
jjCheckNAdd(4);
break;
default : break;
@ -543,12 +559,12 @@ private final int jjMoveNfa_1(int startState, int curPos)
case 4:
if ((0xdfffffffffffffffL & l) == 0L)
break;
if (kind > 32)
kind = 32;
if (kind > 31)
kind = 31;
jjCheckNAdd(4);
break;
case 2:
jjAddStates(15, 16);
jjAddStates(17, 18);
break;
default : break;
}
@ -569,13 +585,13 @@ private final int jjMoveNfa_1(int startState, int curPos)
case 4:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
if (kind > 32)
kind = 32;
if (kind > 31)
kind = 31;
jjCheckNAdd(4);
break;
case 2:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
jjAddStates(15, 16);
jjAddStates(17, 18);
break;
default : break;
}
@ -620,9 +636,9 @@ private final int jjMoveNfa_0(int startState, int curPos)
case 0:
if ((0x3ff000000000000L & l) == 0L)
break;
if (kind > 24)
kind = 24;
jjAddStates(17, 18);
if (kind > 23)
kind = 23;
jjAddStates(19, 20);
break;
case 1:
if (curChar == 46)
@ -631,8 +647,8 @@ private final int jjMoveNfa_0(int startState, int curPos)
case 2:
if ((0x3ff000000000000L & l) == 0L)
break;
if (kind > 24)
kind = 24;
if (kind > 23)
kind = 23;
jjCheckNAdd(2);
break;
default : break;
@ -683,9 +699,9 @@ private final int jjStopStringLiteralDfa_2(int pos, long active0)
switch (pos)
{
case 0:
if ((active0 & 0x2000000L) != 0L)
if ((active0 & 0x1000000L) != 0L)
{
jjmatchedKind = 28;
jjmatchedKind = 27;
return 4;
}
return -1;
@ -710,9 +726,9 @@ private final int jjMoveStringLiteralDfa0_2()
switch(curChar)
{
case 84:
return jjMoveStringLiteralDfa1_2(0x2000000L);
return jjMoveStringLiteralDfa1_2(0x1000000L);
case 93:
return jjStopAtPos(0, 26);
return jjStopAtPos(0, 25);
default :
return jjMoveNfa_2(0, 0);
}
@ -727,8 +743,8 @@ private final int jjMoveStringLiteralDfa1_2(long active0)
switch(curChar)
{
case 79:
if ((active0 & 0x2000000L) != 0L)
return jjStartNfaWithStates_2(1, 25, 4);
if ((active0 & 0x1000000L) != 0L)
return jjStartNfaWithStates_2(1, 24, 4);
break;
default :
break;
@ -757,8 +773,8 @@ private final int jjMoveNfa_2(int startState, int curPos)
case 0:
if ((0xfffffffeffffffffL & l) != 0L)
{
if (kind > 28)
kind = 28;
if (kind > 27)
kind = 27;
jjCheckNAdd(4);
}
if ((0x100002600L & l) != 0L)
@ -778,14 +794,14 @@ private final int jjMoveNfa_2(int startState, int curPos)
jjCheckNAddTwoStates(2, 3);
break;
case 3:
if (curChar == 34 && kind > 27)
kind = 27;
if (curChar == 34 && kind > 26)
kind = 26;
break;
case 4:
if ((0xfffffffeffffffffL & l) == 0L)
break;
if (kind > 28)
kind = 28;
if (kind > 27)
kind = 27;
jjCheckNAdd(4);
break;
default : break;
@ -803,12 +819,12 @@ private final int jjMoveNfa_2(int startState, int curPos)
case 4:
if ((0xffffffffdfffffffL & l) == 0L)
break;
if (kind > 28)
kind = 28;
if (kind > 27)
kind = 27;
jjCheckNAdd(4);
break;
case 2:
jjAddStates(15, 16);
jjAddStates(17, 18);
break;
default : break;
}
@ -829,13 +845,13 @@ private final int jjMoveNfa_2(int startState, int curPos)
case 4:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
if (kind > 28)
kind = 28;
if (kind > 27)
kind = 27;
jjCheckNAdd(4);
break;
case 2:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
jjAddStates(15, 16);
jjAddStates(17, 18);
break;
default : break;
}
@ -855,8 +871,8 @@ private final int jjMoveNfa_2(int startState, int curPos)
}
}
static final int[] jjnextStates = {
20, 23, 24, 27, 28, 25, 21, 23, 24, 25, 22, 26, 29, 15, 16, 2,
3, 0, 1,
22, 25, 26, 29, 30, 27, 23, 18, 19, 25, 26, 27, 24, 28, 31, 15,
16, 2, 3, 0, 1,
};
private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2)
{
@ -872,8 +888,8 @@ private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, lo
}
public static final String[] jjstrLiteralImages = {
"", null, null, null, null, null, null, null, null, null, "\53", "\55", "\50",
"\51", "\72", "\136", null, null, "\176", null, null, null, "\133", "\173", null,
"\124\117", "\135", null, null, "\124\117", "\175", null, null, };
"\51", "\72", "\136", null, null, null, null, null, "\133", "\173", null, "\124\117",
"\135", null, null, "\124\117", "\175", null, null, };
public static final String[] lexStateNames = {
"Boost",
"RangeEx",
@ -881,18 +897,18 @@ public static final String[] lexStateNames = {
"DEFAULT",
};
public static final int[] jjnewLexState = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, 2, 1, 3,
-1, 3, -1, -1, -1, 3, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, 1, 3, -1,
3, -1, -1, -1, 3, -1, -1,
};
static final long[] jjtoToken = {
0x1ffffff81L,
0xffffff81L,
};
static final long[] jjtoSkip = {
0x40L,
};
protected CharStream input_stream;
private final int[] jjrounds = new int[31];
private final int[] jjstateSet = new int[62];
private final int[] jjrounds = new int[33];
private final int[] jjstateSet = new int[66];
protected char curChar;
public QueryParserTokenManager(CharStream stream)
{
@ -914,7 +930,7 @@ private final void ReInitRounds()
{
int i;
jjround = 0x80000001;
for (i = 31; i-- > 0;)
for (i = 33; i-- > 0;)
jjrounds[i] = 0x80000000;
}
public void ReInit(CharStream stream, int lexState)

View File

@ -25,11 +25,15 @@ import java.io.IOException;
*/
public final class FuzzyQuery extends MultiTermQuery {
public final static float defaultMinSimilarity = 0.5f;
private float minimumSimilarity;
private int prefixLength;
/**
* Create a new FuzzyQuery that will match terms with a similarity
* of at least <code>minimumSimilarity</code> to <code>term</code>.
* If a <code>prefixLength</code> &gt; 0 is specified, a common prefix
* of that length is also required.
*
* @param term the term to search for
* @param minimumSimilarity a value between 0 and 1 to set the required similarity
@ -37,29 +41,45 @@ public final class FuzzyQuery extends MultiTermQuery {
* <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length
* as the query term is considered similar to the query term if the edit distance
* between both terms is less than <code>length(term)*0.5</code>.
* @param prefixLength length of common prefix.
* @throws IllegalArgumentException if minimumSimilarity is &gt; 1 or &lt; 0
* or if prefixLength &lt; 0 or &gt; <code>term.text().length()</code>.
*/
public FuzzyQuery(Term term, float minimumSimilarity) throws IllegalArgumentException {
public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength) throws IllegalArgumentException {
super(term);
if (minimumSimilarity > 1.0f)
throw new IllegalArgumentException("minimumSimilarity > 1");
else if (minimumSimilarity < 0.0f)
throw new IllegalArgumentException("minimumSimilarity < 0");
this.minimumSimilarity = minimumSimilarity;
if(prefixLength < 0)
throw new IllegalArgumentException("prefixLength < 0");
else if(prefixLength >= term.text().length())
throw new IllegalArgumentException("prefixLength >= term.text().length()");
this.prefixLength = prefixLength;
}
/**
* Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}.
*/
public FuzzyQuery(Term term, float minimumSimilarity) throws IllegalArgumentException {
this(term, minimumSimilarity, 0);
}
/**
* Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f)}.
* Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}.
*/
public FuzzyQuery(Term term) {
this(term, 0.5f);
this(term, defaultMinSimilarity, 0);
}
protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity);
return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength);
}
public String toString(String field) {
return super.toString(field) + '~';
return super.toString(field) + '~' + Float.toString(minimumSimilarity);
}
}

View File

@ -32,15 +32,49 @@ public final class FuzzyTermEnum extends FilteredTermEnum {
String field = "";
String text = "";
int textlen;
String prefix = "";
int prefixLength = 0;
float minimumSimilarity;
double scale_factor;
/**
* Empty prefix and minSimilarity of 0.5f are used.
*
* @param reader
* @param term
* @throws IOException
* @see #FuzzyTermEnum(IndexReader, Term, float, int)
*/
public FuzzyTermEnum(IndexReader reader, Term term) throws IOException {
this(reader, term, 0.5f);
this(reader, term, FuzzyQuery.defaultMinSimilarity, 0);
}
/**
* This is the standard FuzzyTermEnum with an empty prefix.
*
* @param reader
* @param term
* @param minSimilarity
* @throws IOException
* @see #FuzzyTermEnum(IndexReader, Term, float, int)
*/
public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity) throws IOException {
this(reader, term, minSimilarity, 0);
}
/**
* Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
* length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity &gt;
* <code>minSimilarity</code>.
*
* @param reader Delivers terms.
* @param term Pattern term.
* @param minSimilarity Minimum required similarity for terms from the reader. Default value is 0.5f.
* @param prefixLength Length of required common prefix. Default value is 0.
* @throws IOException
*/
public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity, int prefixLength) throws IOException {
super();
minimumSimilarity = minSimilarity;
scale_factor = 1.0f / (1.0f - minimumSimilarity);
@ -48,7 +82,13 @@ public final class FuzzyTermEnum extends FilteredTermEnum {
field = searchTerm.field();
text = searchTerm.text();
textlen = text.length();
setEnum(reader.terms(new Term(searchTerm.field(), "")));
if(prefixLength > 0 && prefixLength < textlen){
this.prefixLength = prefixLength;
prefix = text.substring(0, prefixLength);
text = text.substring(prefixLength);
textlen = text.length();
}
setEnum(reader.terms(new Term(searchTerm.field(), prefix)));
}
/**
@ -56,8 +96,9 @@ public final class FuzzyTermEnum extends FilteredTermEnum {
calculate the distance between the given term and the comparing term.
*/
protected final boolean termCompare(Term term) {
if (field == term.field()) {
String target = term.text();
String termText = term.text();
if (field == term.field() && termText.startsWith(prefix)) {
String target = termText.substring(prefixLength);
int targetlen = target.length();
int dist = editDistance(text, target, textlen, targetlen);
distance = 1 - ((double)dist / (double)Math.min(textlen, targetlen));