mirror of https://github.com/apache/lucene.git
LUCENE-800: removed backslash from the TERM_CHAR list in the QueryParser to support escaping of backslashes
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@515908 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
35c0da7cab
commit
397187494f
|
@ -1,4 +1,4 @@
|
|||
Lucene Change Log
|
||||
Lucene Change Log
|
||||
|
||||
$Id$
|
||||
|
||||
|
@ -48,6 +48,11 @@ Bug fixes
|
|||
a FileNotFoundException like Lucene pre-2.1 (before this fix you
|
||||
got an NPE). (Mike McCandless)
|
||||
|
||||
7. LUCENE-800: Removed backslash from the TERM_CHAR list in the queryparser,
|
||||
because the backslash is the escape character. Also changed the ESCAPED_CHAR
|
||||
list to contain all possible characters, because every character that
|
||||
follows a backslash should be considered as escaped. (Michael Busch)
|
||||
|
||||
New features
|
||||
|
||||
1. LUCENE-759: Added two n-gram-producing TokenFilters.
|
||||
|
|
|
@ -794,7 +794,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
StringBuffer sb = new StringBuffer();
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
char c = s.charAt(i);
|
||||
// NOTE: keep this in sync with _ESCAPED_CHAR below!
|
||||
// These characters are part of the query syntax and must be escaped
|
||||
if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
|
||||
|| c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
|
||||
|| c == '*' || c == '?') {
|
||||
|
|
|
@ -818,7 +818,7 @@ public class QueryParser {
|
|||
StringBuffer sb = new StringBuffer();
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
char c = s.charAt(i);
|
||||
// NOTE: keep this in sync with _ESCAPED_CHAR below!
|
||||
// These characters are part of the query syntax and must be escaped
|
||||
if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
|
||||
|| c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
|
||||
|| c == '*' || c == '?') {
|
||||
|
@ -854,11 +854,10 @@ PARSER_END(QueryParser)
|
|||
|
||||
<*> TOKEN : {
|
||||
<#_NUM_CHAR: ["0"-"9"] >
|
||||
// NOTE: keep this in sync with escape(String) above!
|
||||
| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^",
|
||||
"[", "]", "\"", "{", "}", "~", "*", "?" ] >
|
||||
// every character that follows a backslash is considered as an escaped character
|
||||
| <#_ESCAPED_CHAR: "\\" ~[] >
|
||||
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^",
|
||||
"[", "]", "\"", "{", "}", "~", "*", "?" ]
|
||||
"[", "]", "\"", "{", "}", "~", "*", "?", "\\" ]
|
||||
| <_ESCAPED_CHAR> ) >
|
||||
| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
|
||||
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") >
|
||||
|
|
|
@ -220,8 +220,6 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
jjCheckNAddTwoStates(25, 26);
|
||||
break;
|
||||
case 27:
|
||||
if ((0x84002f0600000000L & l) == 0L)
|
||||
break;
|
||||
if (kind > 21)
|
||||
kind = 21;
|
||||
jjCheckNAddTwoStates(25, 26);
|
||||
|
@ -241,8 +239,6 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
jjCheckNAddTwoStates(29, 30);
|
||||
break;
|
||||
case 31:
|
||||
if ((0x84002f0600000000L & l) == 0L)
|
||||
break;
|
||||
if (kind > 18)
|
||||
kind = 18;
|
||||
jjCheckNAddTwoStates(29, 30);
|
||||
|
@ -252,8 +248,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
jjCheckNAddStates(10, 12);
|
||||
break;
|
||||
case 34:
|
||||
if ((0x84002f0600000000L & l) != 0L)
|
||||
jjCheckNAddStates(10, 12);
|
||||
jjCheckNAddStates(10, 12);
|
||||
break;
|
||||
default : break;
|
||||
}
|
||||
|
@ -267,37 +262,37 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
switch(jjstateSet[--i])
|
||||
{
|
||||
case 36:
|
||||
if ((0x97ffffff97ffffffL & l) != 0L)
|
||||
if ((0x97ffffff87ffffffL & l) != 0L)
|
||||
{
|
||||
if (kind > 21)
|
||||
kind = 21;
|
||||
jjCheckNAddTwoStates(25, 26);
|
||||
}
|
||||
if (curChar == 92)
|
||||
else if (curChar == 92)
|
||||
jjCheckNAddTwoStates(27, 27);
|
||||
break;
|
||||
case 0:
|
||||
if ((0x97ffffff97ffffffL & l) != 0L)
|
||||
if ((0x97ffffff87ffffffL & l) != 0L)
|
||||
{
|
||||
if (kind > 18)
|
||||
kind = 18;
|
||||
jjCheckNAddStates(0, 4);
|
||||
}
|
||||
else if (curChar == 92)
|
||||
jjCheckNAddStates(13, 15);
|
||||
else if (curChar == 126)
|
||||
{
|
||||
if (kind > 19)
|
||||
kind = 19;
|
||||
jjstateSet[jjnewStateCnt++] = 20;
|
||||
}
|
||||
if ((0x97ffffff97ffffffL & l) != 0L)
|
||||
if ((0x97ffffff87ffffffL & l) != 0L)
|
||||
{
|
||||
if (kind > 21)
|
||||
kind = 21;
|
||||
jjCheckNAddTwoStates(25, 26);
|
||||
}
|
||||
if (curChar == 92)
|
||||
jjCheckNAddStates(13, 15);
|
||||
else if (curChar == 78)
|
||||
if (curChar == 78)
|
||||
jjstateSet[jjnewStateCnt++] = 11;
|
||||
else if (curChar == 124)
|
||||
jjstateSet[jjnewStateCnt++] = 8;
|
||||
|
@ -361,14 +356,14 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
jjstateSet[jjnewStateCnt++] = 20;
|
||||
break;
|
||||
case 24:
|
||||
if ((0x97ffffff97ffffffL & l) == 0L)
|
||||
if ((0x97ffffff87ffffffL & l) == 0L)
|
||||
break;
|
||||
if (kind > 21)
|
||||
kind = 21;
|
||||
jjCheckNAddTwoStates(25, 26);
|
||||
break;
|
||||
case 25:
|
||||
if ((0x97ffffff97ffffffL & l) == 0L)
|
||||
if ((0x97ffffff87ffffffL & l) == 0L)
|
||||
break;
|
||||
if (kind > 21)
|
||||
kind = 21;
|
||||
|
@ -379,21 +374,19 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
jjCheckNAddTwoStates(27, 27);
|
||||
break;
|
||||
case 27:
|
||||
if ((0x6800000078000000L & l) == 0L)
|
||||
break;
|
||||
if (kind > 21)
|
||||
kind = 21;
|
||||
jjCheckNAddTwoStates(25, 26);
|
||||
break;
|
||||
case 28:
|
||||
if ((0x97ffffff97ffffffL & l) == 0L)
|
||||
if ((0x97ffffff87ffffffL & l) == 0L)
|
||||
break;
|
||||
if (kind > 18)
|
||||
kind = 18;
|
||||
jjCheckNAddStates(0, 4);
|
||||
break;
|
||||
case 29:
|
||||
if ((0x97ffffff97ffffffL & l) == 0L)
|
||||
if ((0x97ffffff87ffffffL & l) == 0L)
|
||||
break;
|
||||
if (kind > 18)
|
||||
kind = 18;
|
||||
|
@ -404,14 +397,12 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
jjCheckNAddTwoStates(31, 31);
|
||||
break;
|
||||
case 31:
|
||||
if ((0x6800000078000000L & l) == 0L)
|
||||
break;
|
||||
if (kind > 18)
|
||||
kind = 18;
|
||||
jjCheckNAddTwoStates(29, 30);
|
||||
break;
|
||||
case 32:
|
||||
if ((0x97ffffff97ffffffL & l) != 0L)
|
||||
if ((0x97ffffff87ffffffL & l) != 0L)
|
||||
jjCheckNAddStates(10, 12);
|
||||
break;
|
||||
case 33:
|
||||
|
@ -419,8 +410,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
jjCheckNAddTwoStates(34, 34);
|
||||
break;
|
||||
case 34:
|
||||
if ((0x6800000078000000L & l) != 0L)
|
||||
jjCheckNAddStates(10, 12);
|
||||
jjCheckNAddStates(10, 12);
|
||||
break;
|
||||
case 35:
|
||||
if (curChar == 92)
|
||||
|
@ -443,6 +433,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
{
|
||||
case 36:
|
||||
case 25:
|
||||
case 27:
|
||||
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
|
||||
break;
|
||||
if (kind > 21)
|
||||
|
@ -482,6 +473,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
jjCheckNAddStates(0, 4);
|
||||
break;
|
||||
case 29:
|
||||
case 31:
|
||||
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
|
||||
break;
|
||||
if (kind > 18)
|
||||
|
@ -489,6 +481,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
jjCheckNAddTwoStates(29, 30);
|
||||
break;
|
||||
case 32:
|
||||
case 34:
|
||||
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
|
||||
jjCheckNAddStates(10, 12);
|
||||
break;
|
||||
|
|
|
@ -522,6 +522,8 @@ public class TestQueryParser extends TestCase {
|
|||
//assertQueryEquals("foo \\|| bar", a, "foo \\|| bar");
|
||||
//assertQueryEquals("foo \\AND bar", a, "foo \\AND bar");*/
|
||||
|
||||
assertQueryEquals("\\a", a, "a");
|
||||
|
||||
assertQueryEquals("a\\-b:c", a, "a-b:c");
|
||||
assertQueryEquals("a\\+b:c", a, "a+b:c");
|
||||
assertQueryEquals("a\\:b:c", a, "a:b:c");
|
||||
|
@ -585,6 +587,15 @@ public class TestQueryParser extends TestCase {
|
|||
assertQueryEquals("XY\\u005", a, "XYZ");
|
||||
fail("ParseException expected, not thrown");
|
||||
} catch (ParseException expected) {}
|
||||
|
||||
// Tests bug LUCENE-800
|
||||
assertQueryEquals("(item:\\\\ item:ABCD\\\\)", a, "item:\\ item:ABCD\\");
|
||||
assertQueryEquals("\\*", a, "*");
|
||||
assertQueryEquals("\\\\", a, "\\"); // escaped backslash
|
||||
try {
|
||||
assertQueryEquals("\\", a, "\\");
|
||||
fail("ParseException expected not thrown (backslash must be escaped)");
|
||||
} catch (ParseException expected) {}
|
||||
}
|
||||
|
||||
public void testQueryStringEscaping() throws Exception {
|
||||
|
|
|
@ -170,13 +170,13 @@ public class TestWildcard
|
|||
QueryParser qp = new QueryParser(field, new WhitespaceAnalyzer());
|
||||
qp.setAllowLeadingWildcard(true);
|
||||
String docs[] = {
|
||||
"abcdefg1",
|
||||
"hijklmn1",
|
||||
"opqrstu1",
|
||||
"\\ abcdefg1",
|
||||
"\\79 hijklmn1",
|
||||
"\\\\ opqrstu1",
|
||||
};
|
||||
// queries that should find all docs
|
||||
String matchAll[] = {
|
||||
"*", "*1", "**1", "*?", "*?1", "?*1", "**", "***",
|
||||
"*", "*1", "**1", "*?", "*?1", "?*1", "**", "***", "\\\\*"
|
||||
};
|
||||
// queries that should find no docs
|
||||
String matchNone[] = {
|
||||
|
@ -184,9 +184,9 @@ public class TestWildcard
|
|||
};
|
||||
// queries that should be parsed to prefix queries
|
||||
String matchOneDocPrefix[][] = {
|
||||
{"a*", "ab*", "abc*"}, // these should find only doc 0
|
||||
{"h*", "hi*", "hij*"}, // these should find only doc 1
|
||||
{"o*", "op*", "opq*"}, // these should find only doc 2
|
||||
{"a*", "ab*", "abc*", }, // these should find only doc 0
|
||||
{"h*", "hi*", "hij*", "\\\\7*"}, // these should find only doc 1
|
||||
{"o*", "op*", "opq*", "\\\\\\\\*"}, // these should find only doc 2
|
||||
};
|
||||
// queries that should be parsed to wildcard queries
|
||||
String matchOneDocWild[][] = {
|
||||
|
@ -200,7 +200,7 @@ public class TestWildcard
|
|||
IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer());
|
||||
for (int i = 0; i < docs.length; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field(field,docs[i],Store.NO,Index.UN_TOKENIZED));
|
||||
doc.add(new Field(field,docs[i],Store.NO,Index.TOKENIZED));
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
iw.close();
|
||||
|
|
Loading…
Reference in New Issue