LUCENE-800: removed backslash from the TERM_CHAR list in the QueryParser to support escaping of backslashes

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@515908 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Busch 2007-03-08 03:37:12 +00:00
parent 35c0da7cab
commit 397187494f
6 changed files with 47 additions and 39 deletions

View File

@ -1,4 +1,4 @@
Lucene Change Log
Lucene Change Log
$Id$
@ -48,6 +48,11 @@ Bug fixes
a FileNotFoundException like Lucene pre-2.1 (before this fix you
got an NPE). (Mike McCandless)
7. LUCENE-800: Removed backslash from the TERM_CHAR list in the queryparser,
because the backslash is the escape character. Also changed the ESCAPED_CHAR
list to contain all possible characters, because every character that
follows a backslash should be considered as escaped. (Michael Busch)
New features
1. LUCENE-759: Added two n-gram-producing TokenFilters.

View File

@ -794,7 +794,7 @@ public class QueryParser implements QueryParserConstants {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
// NOTE: keep this in sync with _ESCAPED_CHAR below!
// These characters are part of the query syntax and must be escaped
if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
|| c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
|| c == '*' || c == '?') {

View File

@ -818,7 +818,7 @@ public class QueryParser {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
// NOTE: keep this in sync with _ESCAPED_CHAR below!
// These characters are part of the query syntax and must be escaped
if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
|| c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
|| c == '*' || c == '?') {
@ -854,11 +854,10 @@ PARSER_END(QueryParser)
<*> TOKEN : {
<#_NUM_CHAR: ["0"-"9"] >
// NOTE: keep this in sync with escape(String) above!
| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^",
"[", "]", "\"", "{", "}", "~", "*", "?" ] >
// every character that follows a backslash is considered as an escaped character
| <#_ESCAPED_CHAR: "\\" ~[] >
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^",
"[", "]", "\"", "{", "}", "~", "*", "?" ]
"[", "]", "\"", "{", "}", "~", "*", "?", "\\" ]
| <_ESCAPED_CHAR> ) >
| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") >

View File

@ -220,8 +220,6 @@ private final int jjMoveNfa_3(int startState, int curPos)
jjCheckNAddTwoStates(25, 26);
break;
case 27:
if ((0x84002f0600000000L & l) == 0L)
break;
if (kind > 21)
kind = 21;
jjCheckNAddTwoStates(25, 26);
@ -241,8 +239,6 @@ private final int jjMoveNfa_3(int startState, int curPos)
jjCheckNAddTwoStates(29, 30);
break;
case 31:
if ((0x84002f0600000000L & l) == 0L)
break;
if (kind > 18)
kind = 18;
jjCheckNAddTwoStates(29, 30);
@ -252,8 +248,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
jjCheckNAddStates(10, 12);
break;
case 34:
if ((0x84002f0600000000L & l) != 0L)
jjCheckNAddStates(10, 12);
jjCheckNAddStates(10, 12);
break;
default : break;
}
@ -267,37 +262,37 @@ private final int jjMoveNfa_3(int startState, int curPos)
switch(jjstateSet[--i])
{
case 36:
if ((0x97ffffff97ffffffL & l) != 0L)
if ((0x97ffffff87ffffffL & l) != 0L)
{
if (kind > 21)
kind = 21;
jjCheckNAddTwoStates(25, 26);
}
if (curChar == 92)
else if (curChar == 92)
jjCheckNAddTwoStates(27, 27);
break;
case 0:
if ((0x97ffffff97ffffffL & l) != 0L)
if ((0x97ffffff87ffffffL & l) != 0L)
{
if (kind > 18)
kind = 18;
jjCheckNAddStates(0, 4);
}
else if (curChar == 92)
jjCheckNAddStates(13, 15);
else if (curChar == 126)
{
if (kind > 19)
kind = 19;
jjstateSet[jjnewStateCnt++] = 20;
}
if ((0x97ffffff97ffffffL & l) != 0L)
if ((0x97ffffff87ffffffL & l) != 0L)
{
if (kind > 21)
kind = 21;
jjCheckNAddTwoStates(25, 26);
}
if (curChar == 92)
jjCheckNAddStates(13, 15);
else if (curChar == 78)
if (curChar == 78)
jjstateSet[jjnewStateCnt++] = 11;
else if (curChar == 124)
jjstateSet[jjnewStateCnt++] = 8;
@ -361,14 +356,14 @@ private final int jjMoveNfa_3(int startState, int curPos)
jjstateSet[jjnewStateCnt++] = 20;
break;
case 24:
if ((0x97ffffff97ffffffL & l) == 0L)
if ((0x97ffffff87ffffffL & l) == 0L)
break;
if (kind > 21)
kind = 21;
jjCheckNAddTwoStates(25, 26);
break;
case 25:
if ((0x97ffffff97ffffffL & l) == 0L)
if ((0x97ffffff87ffffffL & l) == 0L)
break;
if (kind > 21)
kind = 21;
@ -379,21 +374,19 @@ private final int jjMoveNfa_3(int startState, int curPos)
jjCheckNAddTwoStates(27, 27);
break;
case 27:
if ((0x6800000078000000L & l) == 0L)
break;
if (kind > 21)
kind = 21;
jjCheckNAddTwoStates(25, 26);
break;
case 28:
if ((0x97ffffff97ffffffL & l) == 0L)
if ((0x97ffffff87ffffffL & l) == 0L)
break;
if (kind > 18)
kind = 18;
jjCheckNAddStates(0, 4);
break;
case 29:
if ((0x97ffffff97ffffffL & l) == 0L)
if ((0x97ffffff87ffffffL & l) == 0L)
break;
if (kind > 18)
kind = 18;
@ -404,14 +397,12 @@ private final int jjMoveNfa_3(int startState, int curPos)
jjCheckNAddTwoStates(31, 31);
break;
case 31:
if ((0x6800000078000000L & l) == 0L)
break;
if (kind > 18)
kind = 18;
jjCheckNAddTwoStates(29, 30);
break;
case 32:
if ((0x97ffffff97ffffffL & l) != 0L)
if ((0x97ffffff87ffffffL & l) != 0L)
jjCheckNAddStates(10, 12);
break;
case 33:
@ -419,8 +410,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
jjCheckNAddTwoStates(34, 34);
break;
case 34:
if ((0x6800000078000000L & l) != 0L)
jjCheckNAddStates(10, 12);
jjCheckNAddStates(10, 12);
break;
case 35:
if (curChar == 92)
@ -443,6 +433,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
{
case 36:
case 25:
case 27:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
if (kind > 21)
@ -482,6 +473,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
jjCheckNAddStates(0, 4);
break;
case 29:
case 31:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
if (kind > 18)
@ -489,6 +481,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
jjCheckNAddTwoStates(29, 30);
break;
case 32:
case 34:
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
jjCheckNAddStates(10, 12);
break;

View File

@ -522,6 +522,8 @@ public class TestQueryParser extends TestCase {
//assertQueryEquals("foo \\|| bar", a, "foo \\|| bar");
//assertQueryEquals("foo \\AND bar", a, "foo \\AND bar");*/
assertQueryEquals("\\a", a, "a");
assertQueryEquals("a\\-b:c", a, "a-b:c");
assertQueryEquals("a\\+b:c", a, "a+b:c");
assertQueryEquals("a\\:b:c", a, "a:b:c");
@ -585,6 +587,15 @@ public class TestQueryParser extends TestCase {
assertQueryEquals("XY\\u005", a, "XYZ");
fail("ParseException expected, not thrown");
} catch (ParseException expected) {}
// Tests bug LUCENE-800
assertQueryEquals("(item:\\\\ item:ABCD\\\\)", a, "item:\\ item:ABCD\\");
assertQueryEquals("\\*", a, "*");
assertQueryEquals("\\\\", a, "\\"); // escaped backslash
try {
assertQueryEquals("\\", a, "\\");
fail("ParseException expected not thrown (backslash must be escaped)");
} catch (ParseException expected) {}
}
public void testQueryStringEscaping() throws Exception {

View File

@ -170,13 +170,13 @@ public class TestWildcard
QueryParser qp = new QueryParser(field, new WhitespaceAnalyzer());
qp.setAllowLeadingWildcard(true);
String docs[] = {
"abcdefg1",
"hijklmn1",
"opqrstu1",
"\\ abcdefg1",
"\\79 hijklmn1",
"\\\\ opqrstu1",
};
// queries that should find all docs
String matchAll[] = {
"*", "*1", "**1", "*?", "*?1", "?*1", "**", "***",
"*", "*1", "**1", "*?", "*?1", "?*1", "**", "***", "\\\\*"
};
// queries that should find no docs
String matchNone[] = {
@ -184,9 +184,9 @@ public class TestWildcard
};
// queries that should be parsed to prefix queries
String matchOneDocPrefix[][] = {
{"a*", "ab*", "abc*"}, // these should find only doc 0
{"h*", "hi*", "hij*"}, // these should find only doc 1
{"o*", "op*", "opq*"}, // these should find only doc 2
{"a*", "ab*", "abc*", }, // these should find only doc 0
{"h*", "hi*", "hij*", "\\\\7*"}, // these should find only doc 1
{"o*", "op*", "opq*", "\\\\\\\\*"}, // these should find only doc 2
};
// queries that should be parsed to wildcard queries
String matchOneDocWild[][] = {
@ -200,7 +200,7 @@ public class TestWildcard
IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer());
for (int i = 0; i < docs.length; i++) {
Document doc = new Document();
doc.add(new Field(field,docs[i],Store.NO,Index.UN_TOKENIZED));
doc.add(new Field(field,docs[i],Store.NO,Index.TOKENIZED));
iw.addDocument(doc);
}
iw.close();