diff --git a/src/java/org/apache/lucene/queryParser/QueryParser.java b/src/java/org/apache/lucene/queryParser/QueryParser.java index 13d6d99f95d..04367a7bbd5 100644 --- a/src/java/org/apache/lucene/queryParser/QueryParser.java +++ b/src/java/org/apache/lucene/queryParser/QueryParser.java @@ -73,6 +73,7 @@ public class QueryParser implements QueryParserConstants { Analyzer analyzer; String field; int phraseSlop = 0; + float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; Locale locale = Locale.getDefault(); /** Parses a query string, returning a {@link org.apache.lucene.search.Query}. @@ -129,6 +130,19 @@ public class QueryParser implements QueryParserConstants { return field; } + /** + * Get the default minimal similarity for fuzzy queries. + */ + public float getFuzzyMinSim() { + return fuzzyMinSim; + } + /** + *Set the default minimum similarity for fuzzy queries. + */ + public void setFuzzyMinSim(float fuzzyMinSim) { + this.fuzzyMinSim = fuzzyMinSim; + } + /** * Sets the default slop for phrases. If zero, then exact phrase matches * are required. Default value is zero. @@ -416,10 +430,10 @@ public class QueryParser implements QueryParserConstants { * @return Resulting {@link Query} built for the term * @exception ParseException throw in overridden method to disallow */ - protected Query getFuzzyQuery(String field, String termStr) throws ParseException + protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException { Term t = new Term(field, termStr); - return new FuzzyQuery(t); + return new FuzzyQuery(t, minSimilarity); } /** @@ -622,7 +636,7 @@ public class QueryParser implements QueryParserConstants { } final public Query Term(String field) throws ParseException { - Token term, boost=null, slop=null, goop1, goop2; + Token term, boost=null, fuzzySlop=null, goop1, goop2; boolean prefix = false; boolean wildcard = false; boolean fuzzy = false; @@ -654,9 +668,9 @@ public class QueryParser implements QueryParserConstants { throw new ParseException(); } switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case FUZZY: - jj_consume_token(FUZZY); - fuzzy=true; + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); + fuzzy=true; break; default: jj_la1[8] = jj_gen; @@ -667,9 +681,9 @@ public class QueryParser implements QueryParserConstants { jj_consume_token(CARAT); boost = jj_consume_token(NUMBER); switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case FUZZY: - jj_consume_token(FUZZY); - fuzzy=true; + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); + fuzzy=true; break; default: jj_la1[9] = jj_gen; @@ -688,7 +702,11 @@ public class QueryParser implements QueryParserConstants { discardEscapeChar(term.image.substring (0, term.image.length()-1))); } else if (fuzzy) { - q = getFuzzyQuery(field, termImage); + float fms = fuzzyMinSim; + try { + fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue(); + } catch (Exception ignored) { } + q = getFuzzyQuery(field, termImage, fms); } else { q = getFieldQuery(field, termImage); } @@ -809,8 +827,8 @@ public class QueryParser implements QueryParserConstants { case QUOTED: term = jj_consume_token(QUOTED); switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case SLOP: - slop = jj_consume_token(SLOP); + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); break; default: jj_la1[19] = jj_gen; @@ -827,9 +845,9 @@ public class QueryParser implements QueryParserConstants { } int s = phraseSlop; - if (slop != null) { + if (fuzzySlop != null) { try { - s = Float.valueOf(slop.image.substring(1)).intValue(); + s = Float.valueOf(fuzzySlop.image.substring(1)).intValue(); } catch (Exception ignored) { } } @@ -883,16 +901,11 @@ public class QueryParser implements QueryParserConstants { private int jj_gen; final private int[] jj_la1 = new int[22]; static private int[] jj_la1_0; - static private int[] jj_la1_1; static { jj_la1_0(); - jj_la1_1(); } private static void jj_la1_0() { - jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0x1f31f80,0x8000,0x1f31000,0x1320000,0x40000,0x40000,0x8000,0x18000000,0x2000000,0x18000000,0x8000,0x80000000,0x20000000,0x80000000,0x8000,0x80000,0x8000,0x1f30000,}; - } - private static void jj_la1_1() { - jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x1,0x0,0x0,0x0,0x0,}; + jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0xfb1f80,0x8000,0xfb1000,0x9a0000,0x40000,0x40000,0x8000,0xc000000,0x1000000,0xc000000,0x8000,0xc0000000,0x10000000,0xc0000000,0x8000,0x40000,0x8000,0xfb0000,}; } final private JJCalls[] jj_2_rtns = new JJCalls[1]; private boolean jj_rescan = false; @@ -1041,8 +1054,8 @@ public class QueryParser implements QueryParserConstants { public ParseException generateParseException() { jj_expentries.removeAllElements(); - boolean[] la1tokens = new boolean[33]; - for (int i = 0; i < 33; i++) { + boolean[] la1tokens = new boolean[32]; + for (int i = 0; i < 32; i++) { la1tokens[i] = false; } if (jj_kind >= 0) { @@ -1055,13 +1068,10 @@ public class QueryParser implements QueryParserConstants { if ((jj_la1_0[i] & (1< : Boost | | (<_TERM_CHAR>)* > -| -| )+ > +| )+ ( "." (<_NUM_CHAR>)+ )? )? > | (<_TERM_CHAR>)* "*" > | (<_TERM_CHAR> | ( [ "*", "?" ] ))* > @@ -641,7 +654,7 @@ Query Clause(String field) : { Query Term(String field) : { - Token term, boost=null, slop=null, goop1, goop2; + Token term, boost=null, fuzzySlop=null, goop1, goop2; boolean prefix = false; boolean wildcard = false; boolean fuzzy = false; @@ -656,8 +669,8 @@ Query Term(String field) : { | term= { wildcard=true; } | term= ) - [ { fuzzy=true; } ] - [ boost= [ { fuzzy=true; } ] ] + [ fuzzySlop= { fuzzy=true; } ] + [ boost= [ fuzzySlop= { fuzzy=true; } ] ] { String termImage=discardEscapeChar(term.image); if (wildcard) { @@ -667,7 +680,11 @@ Query Term(String field) : { discardEscapeChar(term.image.substring (0, term.image.length()-1))); } else if (fuzzy) { - q = getFuzzyQuery(field, termImage); + float fms = fuzzyMinSim; + try { + fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue(); + } catch (Exception ignored) { } + q = getFuzzyQuery(field, termImage, fms); } else { q = getFieldQuery(field, termImage); } @@ -708,14 +725,14 @@ Query Term(String field) : { q = getRangeQuery(field, goop1.image, goop2.image, false); } | term= - [ slop= ] + [ fuzzySlop= ] [ boost= ] { int s = phraseSlop; - if (slop != null) { + if (fuzzySlop != null) { try { - s = Float.valueOf(slop.image.substring(1)).intValue(); + s = Float.valueOf(fuzzySlop.image.substring(1)).intValue(); } catch (Exception ignored) { } } diff --git a/src/java/org/apache/lucene/queryParser/QueryParserConstants.java b/src/java/org/apache/lucene/queryParser/QueryParserConstants.java index f95074e3d5a..e74067f60ce 100644 --- a/src/java/org/apache/lucene/queryParser/QueryParserConstants.java +++ b/src/java/org/apache/lucene/queryParser/QueryParserConstants.java @@ -20,21 +20,20 @@ public interface QueryParserConstants { int CARAT = 15; int QUOTED = 16; int TERM = 17; - int FUZZY = 18; - int SLOP = 19; - int PREFIXTERM = 20; - int WILDTERM = 21; - int RANGEIN_START = 22; - int RANGEEX_START = 23; - int NUMBER = 24; - int RANGEIN_TO = 25; - int RANGEIN_END = 26; - int RANGEIN_QUOTED = 27; - int RANGEIN_GOOP = 28; - int RANGEEX_TO = 29; - int RANGEEX_END = 30; - int RANGEEX_QUOTED = 31; - int RANGEEX_GOOP = 32; + int FUZZY_SLOP = 18; + int PREFIXTERM = 19; + int WILDTERM = 20; + int RANGEIN_START = 21; + int RANGEEX_START = 22; + int NUMBER = 23; + int RANGEIN_TO = 24; + int RANGEIN_END = 25; + int RANGEIN_QUOTED = 26; + int RANGEIN_GOOP = 27; + int RANGEEX_TO = 28; + int RANGEEX_END = 29; + int RANGEEX_QUOTED = 30; + int RANGEEX_GOOP = 31; int Boost = 0; int RangeEx = 1; @@ -60,8 +59,7 @@ public interface QueryParserConstants { "\"^\"", "", "", - "\"~\"", - "", + "", "", "", "\"[\"", diff --git a/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java b/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java index c4202da6091..22de31f50bc 100644 --- a/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java +++ b/src/java/org/apache/lucene/queryParser/QueryParserTokenManager.java @@ -54,13 +54,11 @@ private final int jjMoveStringLiteralDfa0_3() case 58: return jjStopAtPos(0, 14); case 91: - return jjStopAtPos(0, 22); + return jjStopAtPos(0, 21); case 94: return jjStopAtPos(0, 15); case 123: - return jjStopAtPos(0, 23); - case 126: - return jjStartNfaWithStates_3(0, 18, 18); + return jjStopAtPos(0, 22); default : return jjMoveNfa_3(0, 0); } @@ -105,7 +103,7 @@ private final int jjMoveNfa_3(int startState, int curPos) { int[] nextStates; int startsAt = 0; - jjnewStateCnt = 31; + jjnewStateCnt = 33; int i = 1; jjstateSet[0] = startState; int j, kind = 0x7fffffff; @@ -169,56 +167,67 @@ private final int jjMoveNfa_3(int startState, int curPos) case 18: if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 19) - kind = 19; - jjstateSet[jjnewStateCnt++] = 18; + if (kind > 18) + kind = 18; + jjAddStates(7, 8); break; case 19: + if (curChar == 46) + jjCheckNAdd(20); + break; + case 20: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 18) + kind = 18; + jjCheckNAdd(20); + break; + case 21: if ((0x7bffd0f8ffffd9ffL & l) == 0L) break; if (kind > 17) kind = 17; jjCheckNAddStates(0, 6); break; - case 20: + case 22: if ((0x7bfff8f8ffffd9ffL & l) == 0L) break; if (kind > 17) kind = 17; - jjCheckNAddTwoStates(20, 21); + jjCheckNAddTwoStates(22, 23); break; - case 22: + case 24: if ((0x84002f0600000000L & l) == 0L) break; if (kind > 17) kind = 17; - jjCheckNAddTwoStates(20, 21); + jjCheckNAddTwoStates(22, 23); break; - case 23: + case 25: if ((0x7bfff8f8ffffd9ffL & l) != 0L) - jjCheckNAddStates(7, 9); - break; - case 24: - if (curChar == 42 && kind > 20) - kind = 20; + jjCheckNAddStates(9, 11); break; case 26: - if ((0x84002f0600000000L & l) != 0L) - jjCheckNAddStates(7, 9); + if (curChar == 42 && kind > 19) + kind = 19; break; - case 27: - if ((0xfbfffcf8ffffd9ffL & l) == 0L) - break; - if (kind > 21) - kind = 21; - jjCheckNAddTwoStates(27, 28); + case 28: + if ((0x84002f0600000000L & l) != 0L) + jjCheckNAddStates(9, 11); break; case 29: + if ((0xfbfffcf8ffffd9ffL & l) == 0L) + break; + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(29, 30); + break; + case 31: if ((0x84002f0600000000L & l) == 0L) break; - if (kind > 21) - kind = 21; - jjCheckNAddTwoStates(27, 28); + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(29, 30); break; default : break; } @@ -239,9 +248,13 @@ private final int jjMoveNfa_3(int startState, int curPos) jjCheckNAddStates(0, 6); } else if (curChar == 126) + { + if (kind > 18) + kind = 18; jjstateSet[jjnewStateCnt++] = 18; + } if (curChar == 92) - jjCheckNAddStates(10, 12); + jjCheckNAddStates(12, 14); else if (curChar == 78) jjstateSet[jjnewStateCnt++] = 11; else if (curChar == 124) @@ -292,70 +305,73 @@ private final int jjMoveNfa_3(int startState, int curPos) jjstateSet[jjnewStateCnt++] = 11; break; case 15: - jjAddStates(13, 14); + jjAddStates(15, 16); break; case 17: - if (curChar == 126) - jjstateSet[jjnewStateCnt++] = 18; + if (curChar != 126) + break; + if (kind > 18) + kind = 18; + jjstateSet[jjnewStateCnt++] = 18; break; - case 19: + case 21: if ((0x97ffffff97ffffffL & l) == 0L) break; if (kind > 17) kind = 17; jjCheckNAddStates(0, 6); break; - case 20: + case 22: if ((0x97ffffff97ffffffL & l) == 0L) break; if (kind > 17) kind = 17; - jjCheckNAddTwoStates(20, 21); - break; - case 21: - if (curChar == 92) - jjCheckNAddTwoStates(22, 22); - break; - case 22: - if ((0x6800000078000000L & l) == 0L) - break; - if (kind > 17) - kind = 17; - jjCheckNAddTwoStates(20, 21); + jjCheckNAddTwoStates(22, 23); break; case 23: - if ((0x97ffffff97ffffffL & l) != 0L) - jjCheckNAddStates(7, 9); - break; - case 25: if (curChar == 92) - jjCheckNAddTwoStates(26, 26); + jjCheckNAddTwoStates(24, 24); break; - case 26: - if ((0x6800000078000000L & l) != 0L) - jjCheckNAddStates(7, 9); - break; - case 27: - if ((0x97ffffff97ffffffL & l) == 0L) - break; - if (kind > 21) - kind = 21; - jjCheckNAddTwoStates(27, 28); - break; - case 28: - if (curChar == 92) - jjCheckNAddTwoStates(29, 29); - break; - case 29: + case 24: if ((0x6800000078000000L & l) == 0L) break; - if (kind > 21) - kind = 21; - jjCheckNAddTwoStates(27, 28); + if (kind > 17) + kind = 17; + jjCheckNAddTwoStates(22, 23); + break; + case 25: + if ((0x97ffffff97ffffffL & l) != 0L) + jjCheckNAddStates(9, 11); + break; + case 27: + if (curChar == 92) + jjCheckNAddTwoStates(28, 28); + break; + case 28: + if ((0x6800000078000000L & l) != 0L) + jjCheckNAddStates(9, 11); + break; + case 29: + if ((0x97ffffff97ffffffL & l) == 0L) + break; + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(29, 30); break; case 30: if (curChar == 92) - jjCheckNAddStates(10, 12); + jjCheckNAddTwoStates(31, 31); + break; + case 31: + if ((0x6800000078000000L & l) == 0L) + break; + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(29, 30); + break; + case 32: + if (curChar == 92) + jjCheckNAddStates(12, 14); break; default : break; } @@ -381,25 +397,25 @@ private final int jjMoveNfa_3(int startState, int curPos) break; case 15: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjAddStates(13, 14); + jjAddStates(15, 16); break; - case 20: + case 22: if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) break; if (kind > 17) kind = 17; - jjCheckNAddTwoStates(20, 21); + jjCheckNAddTwoStates(22, 23); break; - case 23: + case 25: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(7, 9); + jjCheckNAddStates(9, 11); break; - case 27: + case 29: if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) break; - if (kind > 21) - kind = 21; - jjCheckNAddTwoStates(27, 28); + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(29, 30); break; default : break; } @@ -412,7 +428,7 @@ private final int jjMoveNfa_3(int startState, int curPos) kind = 0x7fffffff; } ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 31 - (jjnewStateCnt = startsAt))) + if ((i = jjnewStateCnt) == (startsAt = 33 - (jjnewStateCnt = startsAt))) return curPos; try { curChar = input_stream.readChar(); } catch(java.io.IOException e) { return curPos; } @@ -423,9 +439,9 @@ private final int jjStopStringLiteralDfa_1(int pos, long active0) switch (pos) { case 0: - if ((active0 & 0x20000000L) != 0L) + if ((active0 & 0x10000000L) != 0L) { - jjmatchedKind = 32; + jjmatchedKind = 31; return 4; } return -1; @@ -450,9 +466,9 @@ private final int jjMoveStringLiteralDfa0_1() switch(curChar) { case 84: - return jjMoveStringLiteralDfa1_1(0x20000000L); + return jjMoveStringLiteralDfa1_1(0x10000000L); case 125: - return jjStopAtPos(0, 30); + return jjStopAtPos(0, 29); default : return jjMoveNfa_1(0, 0); } @@ -467,8 +483,8 @@ private final int jjMoveStringLiteralDfa1_1(long active0) switch(curChar) { case 79: - if ((active0 & 0x20000000L) != 0L) - return jjStartNfaWithStates_1(1, 29, 4); + if ((active0 & 0x10000000L) != 0L) + return jjStartNfaWithStates_1(1, 28, 4); break; default : break; @@ -497,8 +513,8 @@ private final int jjMoveNfa_1(int startState, int curPos) case 0: if ((0xfffffffeffffffffL & l) != 0L) { - if (kind > 32) - kind = 32; + if (kind > 31) + kind = 31; jjCheckNAdd(4); } if ((0x100002600L & l) != 0L) @@ -518,14 +534,14 @@ private final int jjMoveNfa_1(int startState, int curPos) jjCheckNAddTwoStates(2, 3); break; case 3: - if (curChar == 34 && kind > 31) - kind = 31; + if (curChar == 34 && kind > 30) + kind = 30; break; case 4: if ((0xfffffffeffffffffL & l) == 0L) break; - if (kind > 32) - kind = 32; + if (kind > 31) + kind = 31; jjCheckNAdd(4); break; default : break; @@ -543,12 +559,12 @@ private final int jjMoveNfa_1(int startState, int curPos) case 4: if ((0xdfffffffffffffffL & l) == 0L) break; - if (kind > 32) - kind = 32; + if (kind > 31) + kind = 31; jjCheckNAdd(4); break; case 2: - jjAddStates(15, 16); + jjAddStates(17, 18); break; default : break; } @@ -569,13 +585,13 @@ private final int jjMoveNfa_1(int startState, int curPos) case 4: if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) break; - if (kind > 32) - kind = 32; + if (kind > 31) + kind = 31; jjCheckNAdd(4); break; case 2: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjAddStates(15, 16); + jjAddStates(17, 18); break; default : break; } @@ -620,9 +636,9 @@ private final int jjMoveNfa_0(int startState, int curPos) case 0: if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 24) - kind = 24; - jjAddStates(17, 18); + if (kind > 23) + kind = 23; + jjAddStates(19, 20); break; case 1: if (curChar == 46) @@ -631,8 +647,8 @@ private final int jjMoveNfa_0(int startState, int curPos) case 2: if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 24) - kind = 24; + if (kind > 23) + kind = 23; jjCheckNAdd(2); break; default : break; @@ -683,9 +699,9 @@ private final int jjStopStringLiteralDfa_2(int pos, long active0) switch (pos) { case 0: - if ((active0 & 0x2000000L) != 0L) + if ((active0 & 0x1000000L) != 0L) { - jjmatchedKind = 28; + jjmatchedKind = 27; return 4; } return -1; @@ -710,9 +726,9 @@ private final int jjMoveStringLiteralDfa0_2() switch(curChar) { case 84: - return jjMoveStringLiteralDfa1_2(0x2000000L); + return jjMoveStringLiteralDfa1_2(0x1000000L); case 93: - return jjStopAtPos(0, 26); + return jjStopAtPos(0, 25); default : return jjMoveNfa_2(0, 0); } @@ -727,8 +743,8 @@ private final int jjMoveStringLiteralDfa1_2(long active0) switch(curChar) { case 79: - if ((active0 & 0x2000000L) != 0L) - return jjStartNfaWithStates_2(1, 25, 4); + if ((active0 & 0x1000000L) != 0L) + return jjStartNfaWithStates_2(1, 24, 4); break; default : break; @@ -757,8 +773,8 @@ private final int jjMoveNfa_2(int startState, int curPos) case 0: if ((0xfffffffeffffffffL & l) != 0L) { - if (kind > 28) - kind = 28; + if (kind > 27) + kind = 27; jjCheckNAdd(4); } if ((0x100002600L & l) != 0L) @@ -778,14 +794,14 @@ private final int jjMoveNfa_2(int startState, int curPos) jjCheckNAddTwoStates(2, 3); break; case 3: - if (curChar == 34 && kind > 27) - kind = 27; + if (curChar == 34 && kind > 26) + kind = 26; break; case 4: if ((0xfffffffeffffffffL & l) == 0L) break; - if (kind > 28) - kind = 28; + if (kind > 27) + kind = 27; jjCheckNAdd(4); break; default : break; @@ -803,12 +819,12 @@ private final int jjMoveNfa_2(int startState, int curPos) case 4: if ((0xffffffffdfffffffL & l) == 0L) break; - if (kind > 28) - kind = 28; + if (kind > 27) + kind = 27; jjCheckNAdd(4); break; case 2: - jjAddStates(15, 16); + jjAddStates(17, 18); break; default : break; } @@ -829,13 +845,13 @@ private final int jjMoveNfa_2(int startState, int curPos) case 4: if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) break; - if (kind > 28) - kind = 28; + if (kind > 27) + kind = 27; jjCheckNAdd(4); break; case 2: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjAddStates(15, 16); + jjAddStates(17, 18); break; default : break; } @@ -855,8 +871,8 @@ private final int jjMoveNfa_2(int startState, int curPos) } } static final int[] jjnextStates = { - 20, 23, 24, 27, 28, 25, 21, 23, 24, 25, 22, 26, 29, 15, 16, 2, - 3, 0, 1, + 22, 25, 26, 29, 30, 27, 23, 18, 19, 25, 26, 27, 24, 28, 31, 15, + 16, 2, 3, 0, 1, }; private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2) { @@ -872,8 +888,8 @@ private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, lo } public static final String[] jjstrLiteralImages = { "", null, null, null, null, null, null, null, null, null, "\53", "\55", "\50", -"\51", "\72", "\136", null, null, "\176", null, null, null, "\133", "\173", null, -"\124\117", "\135", null, null, "\124\117", "\175", null, null, }; +"\51", "\72", "\136", null, null, null, null, null, "\133", "\173", null, "\124\117", +"\135", null, null, "\124\117", "\175", null, null, }; public static final String[] lexStateNames = { "Boost", "RangeEx", @@ -881,18 +897,18 @@ public static final String[] lexStateNames = { "DEFAULT", }; public static final int[] jjnewLexState = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, 2, 1, 3, - -1, 3, -1, -1, -1, 3, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, 1, 3, -1, + 3, -1, -1, -1, 3, -1, -1, }; static final long[] jjtoToken = { - 0x1ffffff81L, + 0xffffff81L, }; static final long[] jjtoSkip = { 0x40L, }; protected CharStream input_stream; -private final int[] jjrounds = new int[31]; -private final int[] jjstateSet = new int[62]; +private final int[] jjrounds = new int[33]; +private final int[] jjstateSet = new int[66]; protected char curChar; public QueryParserTokenManager(CharStream stream) { @@ -914,7 +930,7 @@ private final void ReInitRounds() { int i; jjround = 0x80000001; - for (i = 31; i-- > 0;) + for (i = 33; i-- > 0;) jjrounds[i] = 0x80000000; } public void ReInit(CharStream stream, int lexState) diff --git a/src/java/org/apache/lucene/search/FuzzyQuery.java b/src/java/org/apache/lucene/search/FuzzyQuery.java index 7332bfb8965..8f0d7a19be9 100644 --- a/src/java/org/apache/lucene/search/FuzzyQuery.java +++ b/src/java/org/apache/lucene/search/FuzzyQuery.java @@ -25,11 +25,15 @@ import java.io.IOException; */ public final class FuzzyQuery extends MultiTermQuery { + public final static float defaultMinSimilarity = 0.5f; private float minimumSimilarity; + private int prefixLength; /** * Create a new FuzzyQuery that will match terms with a similarity * of at least minimumSimilarity to term. + * If a prefixLength > 0 is specified, a common prefix + * of that length is also required. * * @param term the term to search for * @param minimumSimilarity a value between 0 and 1 to set the required similarity @@ -37,29 +41,45 @@ public final class FuzzyQuery extends MultiTermQuery { * minimumSimilarity of 0.5 a term of the same length * as the query term is considered similar to the query term if the edit distance * between both terms is less than length(term)*0.5. + * @param prefixLength length of common prefix. * @throws IllegalArgumentException if minimumSimilarity is > 1 or < 0 + * or if prefixLength < 0 or > term.text().length(). */ - public FuzzyQuery(Term term, float minimumSimilarity) throws IllegalArgumentException { + public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength) throws IllegalArgumentException { super(term); + if (minimumSimilarity > 1.0f) throw new IllegalArgumentException("minimumSimilarity > 1"); else if (minimumSimilarity < 0.0f) throw new IllegalArgumentException("minimumSimilarity < 0"); this.minimumSimilarity = minimumSimilarity; + + if(prefixLength < 0) + throw new IllegalArgumentException("prefixLength < 0"); + else if(prefixLength >= term.text().length()) + throw new IllegalArgumentException("prefixLength >= term.text().length()"); + this.prefixLength = prefixLength; + } + + /** + * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}. + */ + public FuzzyQuery(Term term, float minimumSimilarity) throws IllegalArgumentException { + this(term, minimumSimilarity, 0); } /** - * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f)}. + * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}. */ public FuzzyQuery(Term term) { - this(term, 0.5f); + this(term, defaultMinSimilarity, 0); } protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { - return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity); + return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength); } public String toString(String field) { - return super.toString(field) + '~'; + return super.toString(field) + '~' + Float.toString(minimumSimilarity); } } diff --git a/src/java/org/apache/lucene/search/FuzzyTermEnum.java b/src/java/org/apache/lucene/search/FuzzyTermEnum.java index 49a97f33b40..7ede17243b8 100644 --- a/src/java/org/apache/lucene/search/FuzzyTermEnum.java +++ b/src/java/org/apache/lucene/search/FuzzyTermEnum.java @@ -32,15 +32,49 @@ public final class FuzzyTermEnum extends FilteredTermEnum { String field = ""; String text = ""; int textlen; + String prefix = ""; + int prefixLength = 0; float minimumSimilarity; double scale_factor; + /** + * Empty prefix and minSimilarity of 0.5f are used. + * + * @param reader + * @param term + * @throws IOException + * @see #FuzzyTermEnum(IndexReader, Term, float, int) + */ public FuzzyTermEnum(IndexReader reader, Term term) throws IOException { - this(reader, term, 0.5f); + this(reader, term, FuzzyQuery.defaultMinSimilarity, 0); } + /** + * This is the standard FuzzyTermEnum with an empty prefix. + * + * @param reader + * @param term + * @param minSimilarity + * @throws IOException + * @see #FuzzyTermEnum(IndexReader, Term, float, int) + */ public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity) throws IOException { + this(reader, term, minSimilarity, 0); + } + + /** + * Constructor for enumeration of all terms from specified reader which share a prefix of + * length prefixLength with term and which have a fuzzy similarity > + * minSimilarity. + * + * @param reader Delivers terms. + * @param term Pattern term. + * @param minSimilarity Minimum required similarity for terms from the reader. Default value is 0.5f. + * @param prefixLength Length of required common prefix. Default value is 0. + * @throws IOException + */ + public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity, int prefixLength) throws IOException { super(); minimumSimilarity = minSimilarity; scale_factor = 1.0f / (1.0f - minimumSimilarity); @@ -48,7 +82,13 @@ public final class FuzzyTermEnum extends FilteredTermEnum { field = searchTerm.field(); text = searchTerm.text(); textlen = text.length(); - setEnum(reader.terms(new Term(searchTerm.field(), ""))); + if(prefixLength > 0 && prefixLength < textlen){ + this.prefixLength = prefixLength; + prefix = text.substring(0, prefixLength); + text = text.substring(prefixLength); + textlen = text.length(); + } + setEnum(reader.terms(new Term(searchTerm.field(), prefix))); } /** @@ -56,8 +96,9 @@ public final class FuzzyTermEnum extends FilteredTermEnum { calculate the distance between the given term and the comparing term. */ protected final boolean termCompare(Term term) { - if (field == term.field()) { - String target = term.text(); + String termText = term.text(); + if (field == term.field() && termText.startsWith(prefix)) { + String target = termText.substring(prefixLength); int targetlen = target.length(); int dist = editDistance(text, target, textlen, targetlen); distance = 1 - ((double)dist / (double)Math.min(textlen, targetlen));