mirror of https://github.com/apache/lucene.git
Parse escaped brackets and spaces in range queries
This commit is contained in:
parent
4f8035c013
commit
eb7b1e4bbd
|
@ -1,9 +1,9 @@
|
||||||
{
|
{
|
||||||
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java": "7a8a8fd5b2ea78f9a17f54cbae8b0e4496e8988e",
|
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java": "7a8a8fd5b2ea78f9a17f54cbae8b0e4496e8988e",
|
||||||
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java": "a2b7d21092d21cbac290cb1ddde5ac161824fb83",
|
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java": "a2b7d21092d21cbac290cb1ddde5ac161824fb83",
|
||||||
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj": "ed9f248e1a48cadeeab8f0a79e77e986e34ff721",
|
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj": "64bc53b09c0665afd281f1d85a2db802e5f75266",
|
||||||
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserConstants.java": "e59a3fd38b66a3d56779c55955c1e014225a1f50",
|
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserConstants.java": "e59a3fd38b66a3d56779c55955c1e014225a1f50",
|
||||||
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java": "dc99a1083bfa50e429d40e114fabe7dd5d434693",
|
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java": "b83ebcc6c97618f1986573cb294d6180f1943f77",
|
||||||
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java": "310665ba37d982327fcb55cc3523d629ef29ef54",
|
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java": "310665ba37d982327fcb55cc3523d629ef29ef54",
|
||||||
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java": "7e2dd6ab7489048bb70f3077ca9fed90f925ec33"
|
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java": "7e2dd6ab7489048bb70f3077ca9fed90f925ec33"
|
||||||
}
|
}
|
|
@ -223,7 +223,7 @@ PARSER_END(QueryParser)
|
||||||
| <RANGEIN_END: "]"> : DEFAULT
|
| <RANGEIN_END: "]"> : DEFAULT
|
||||||
| <RANGEEX_END: "}"> : DEFAULT
|
| <RANGEEX_END: "}"> : DEFAULT
|
||||||
| <RANGE_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
|
| <RANGE_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
|
||||||
| <RANGE_GOOP: (~[ " ", "]", "}" ])+ >
|
| <RANGE_GOOP: (~[ "\\", " ", "]", "}" ] | "\\" ~[])+ >
|
||||||
}
|
}
|
||||||
|
|
||||||
// * Query ::= ( Clause )*
|
// * Query ::= ( Clause )*
|
||||||
|
|
|
@ -721,7 +721,7 @@ private final int jjStopStringLiteralDfa_1(int pos, long active0){
|
||||||
if ((active0 & 0x10000000L) != 0L)
|
if ((active0 & 0x10000000L) != 0L)
|
||||||
{
|
{
|
||||||
jjmatchedKind = 32;
|
jjmatchedKind = 32;
|
||||||
return 6;
|
return 9;
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
default :
|
default :
|
||||||
|
@ -754,7 +754,7 @@ private int jjMoveStringLiteralDfa1_1(long active0){
|
||||||
{
|
{
|
||||||
case 79:
|
case 79:
|
||||||
if ((active0 & 0x10000000L) != 0L)
|
if ((active0 & 0x10000000L) != 0L)
|
||||||
return jjStartNfaWithStates_1(1, 28, 6);
|
return jjStartNfaWithStates_1(1, 28, 9);
|
||||||
break;
|
break;
|
||||||
default :
|
default :
|
||||||
break;
|
break;
|
||||||
|
@ -772,7 +772,7 @@ private int jjStartNfaWithStates_1(int pos, int kind, int state)
|
||||||
private int jjMoveNfa_1(int startState, int curPos)
|
private int jjMoveNfa_1(int startState, int curPos)
|
||||||
{
|
{
|
||||||
int startsAt = 0;
|
int startsAt = 0;
|
||||||
jjnewStateCnt = 7;
|
jjnewStateCnt = 9;
|
||||||
int i = 1;
|
int i = 1;
|
||||||
jjstateSet[0] = startState;
|
jjstateSet[0] = startState;
|
||||||
int kind = 0x7fffffff;
|
int kind = 0x7fffffff;
|
||||||
|
@ -792,7 +792,7 @@ private int jjMoveNfa_1(int startState, int curPos)
|
||||||
{
|
{
|
||||||
if (kind > 32)
|
if (kind > 32)
|
||||||
kind = 32;
|
kind = 32;
|
||||||
{ jjCheckNAdd(6); }
|
{ jjCheckNAddTwoStates(6, 7); }
|
||||||
}
|
}
|
||||||
if ((0x100002600L & l) != 0L)
|
if ((0x100002600L & l) != 0L)
|
||||||
{
|
{
|
||||||
|
@ -802,6 +802,14 @@ private int jjMoveNfa_1(int startState, int curPos)
|
||||||
else if (curChar == 34)
|
else if (curChar == 34)
|
||||||
{ jjCheckNAddTwoStates(2, 4); }
|
{ jjCheckNAddTwoStates(2, 4); }
|
||||||
break;
|
break;
|
||||||
|
case 9:
|
||||||
|
case 6:
|
||||||
|
if ((0xfffffffeffffffffL & l) == 0L)
|
||||||
|
break;
|
||||||
|
if (kind > 32)
|
||||||
|
kind = 32;
|
||||||
|
{ jjCheckNAddTwoStates(6, 7); }
|
||||||
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
if (curChar == 34)
|
if (curChar == 34)
|
||||||
{ jjCheckNAddTwoStates(2, 4); }
|
{ jjCheckNAddTwoStates(2, 4); }
|
||||||
|
@ -818,12 +826,10 @@ private int jjMoveNfa_1(int startState, int curPos)
|
||||||
if (curChar == 34 && kind > 31)
|
if (curChar == 34 && kind > 31)
|
||||||
kind = 31;
|
kind = 31;
|
||||||
break;
|
break;
|
||||||
case 6:
|
case 8:
|
||||||
if ((0xfffffffeffffffffL & l) == 0L)
|
|
||||||
break;
|
|
||||||
if (kind > 32)
|
if (kind > 32)
|
||||||
kind = 32;
|
kind = 32;
|
||||||
{ jjCheckNAdd(6); }
|
{ jjCheckNAddTwoStates(6, 7); }
|
||||||
break;
|
break;
|
||||||
default : break;
|
default : break;
|
||||||
}
|
}
|
||||||
|
@ -837,12 +843,24 @@ private int jjMoveNfa_1(int startState, int curPos)
|
||||||
switch(jjstateSet[--i])
|
switch(jjstateSet[--i])
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
case 6:
|
if ((0xdfffffffcfffffffL & l) != 0L)
|
||||||
if ((0xdfffffffdfffffffL & l) == 0L)
|
{
|
||||||
break;
|
if (kind > 32)
|
||||||
if (kind > 32)
|
kind = 32;
|
||||||
kind = 32;
|
{ jjCheckNAddTwoStates(6, 7); }
|
||||||
{ jjCheckNAdd(6); }
|
}
|
||||||
|
else if (curChar == 92)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 8;
|
||||||
|
break;
|
||||||
|
case 9:
|
||||||
|
if ((0xdfffffffcfffffffL & l) != 0L)
|
||||||
|
{
|
||||||
|
if (kind > 32)
|
||||||
|
kind = 32;
|
||||||
|
{ jjCheckNAddTwoStates(6, 7); }
|
||||||
|
}
|
||||||
|
else if (curChar == 92)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 8;
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
{ jjAddStates(29, 31); }
|
{ jjAddStates(29, 31); }
|
||||||
|
@ -851,6 +869,22 @@ private int jjMoveNfa_1(int startState, int curPos)
|
||||||
if (curChar == 92)
|
if (curChar == 92)
|
||||||
jjstateSet[jjnewStateCnt++] = 3;
|
jjstateSet[jjnewStateCnt++] = 3;
|
||||||
break;
|
break;
|
||||||
|
case 6:
|
||||||
|
if ((0xdfffffffcfffffffL & l) == 0L)
|
||||||
|
break;
|
||||||
|
if (kind > 32)
|
||||||
|
kind = 32;
|
||||||
|
{ jjCheckNAddTwoStates(6, 7); }
|
||||||
|
break;
|
||||||
|
case 7:
|
||||||
|
if (curChar == 92)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 8;
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
if (kind > 32)
|
||||||
|
kind = 32;
|
||||||
|
{ jjCheckNAddTwoStates(6, 7); }
|
||||||
|
break;
|
||||||
default : break;
|
default : break;
|
||||||
}
|
}
|
||||||
} while(i != startsAt);
|
} while(i != startsAt);
|
||||||
|
@ -876,19 +910,21 @@ private int jjMoveNfa_1(int startState, int curPos)
|
||||||
{
|
{
|
||||||
if (kind > 32)
|
if (kind > 32)
|
||||||
kind = 32;
|
kind = 32;
|
||||||
{ jjCheckNAdd(6); }
|
{ jjCheckNAddTwoStates(6, 7); }
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 9:
|
||||||
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
|
|
||||||
{ jjAddStates(29, 31); }
|
|
||||||
break;
|
|
||||||
case 6:
|
case 6:
|
||||||
|
case 8:
|
||||||
if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
|
if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
|
||||||
break;
|
break;
|
||||||
if (kind > 32)
|
if (kind > 32)
|
||||||
kind = 32;
|
kind = 32;
|
||||||
{ jjCheckNAdd(6); }
|
{ jjCheckNAddTwoStates(6, 7); }
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
|
||||||
|
{ jjAddStates(29, 31); }
|
||||||
break;
|
break;
|
||||||
default : if (i1 == 0 || l1 == 0 || i2 == 0 || l2 == 0) break; else break;
|
default : if (i1 == 0 || l1 == 0 || i2 == 0 || l2 == 0) break; else break;
|
||||||
}
|
}
|
||||||
|
@ -901,7 +937,7 @@ private int jjMoveNfa_1(int startState, int curPos)
|
||||||
kind = 0x7fffffff;
|
kind = 0x7fffffff;
|
||||||
}
|
}
|
||||||
++curPos;
|
++curPos;
|
||||||
if ((i = jjnewStateCnt) == (startsAt = 7 - (jjnewStateCnt = startsAt)))
|
if ((i = jjnewStateCnt) == (startsAt = 9 - (jjnewStateCnt = startsAt)))
|
||||||
return curPos;
|
return curPos;
|
||||||
try { curChar = input_stream.readChar(); }
|
try { curChar = input_stream.readChar(); }
|
||||||
catch(java.io.IOException e) { return curPos; }
|
catch(java.io.IOException e) { return curPos; }
|
||||||
|
|
|
@ -1020,6 +1020,114 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
||||||
// iw.addDocument(d);
|
// iw.addDocument(d);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
public void testRangeQueryWithEscapedClosingBrackets() throws Exception {
|
||||||
|
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
||||||
|
|
||||||
|
// Escaped brackets are parsed
|
||||||
|
assertQueryEquals("[\\] TO def]", a, "[] TO def]");
|
||||||
|
assertQueryEquals("[\\]abc TO def]", a, "[]abc TO def]");
|
||||||
|
assertQueryEquals("[a\\]bc TO def]", a, "[a]bc TO def]");
|
||||||
|
assertQueryEquals("[abc\\] TO def]", a, "[abc] TO def]");
|
||||||
|
assertQueryEquals("[abc TO \\]]", a, "[abc TO ]]");
|
||||||
|
assertQueryEquals("[abc TO \\]def]", a, "[abc TO ]def]");
|
||||||
|
assertQueryEquals("[abc TO d\\]ef]", a, "[abc TO d]ef]");
|
||||||
|
assertQueryEquals("[abc TO def\\]]", a, "[abc TO def]]");
|
||||||
|
|
||||||
|
assertQueryEquals("{\\} TO def}", a, "{} TO def}");
|
||||||
|
assertQueryEquals("{\\}abc TO def}", a, "{}abc TO def}");
|
||||||
|
assertQueryEquals("{a\\}bc TO def}", a, "{a}bc TO def}");
|
||||||
|
assertQueryEquals("{abc\\} TO def}", a, "{abc} TO def}");
|
||||||
|
assertQueryEquals("{abc TO \\}}", a, "{abc TO }}");
|
||||||
|
assertQueryEquals("{abc TO \\}def}", a, "{abc TO }def}");
|
||||||
|
assertQueryEquals("{abc TO d\\}ef}", a, "{abc TO d}ef}");
|
||||||
|
assertQueryEquals("{abc TO def\\}}", a, "{abc TO def}}");
|
||||||
|
|
||||||
|
assertQueryEquals("[\\[\\] TO \\[\\]]", a, "[[] TO []]");
|
||||||
|
assertQueryEquals("[\\{\\} TO \\{\\}]", a, "[{} TO {}]");
|
||||||
|
assertQueryEquals("{\\[\\] TO \\[\\]}", a, "{[] TO []}");
|
||||||
|
assertQueryEquals("{\\{\\} TO \\{\\}}", a, "{{} TO {}}");
|
||||||
|
|
||||||
|
assertQueryEquals(
|
||||||
|
"[ 2024-01-01T01:01:01+01:00\\[Europe/Warsaw\\] TO 2025-01-01T01:01:01+01:00\\[Europe/Warsaw\\] ]",
|
||||||
|
a,
|
||||||
|
"[2024-01-01T01:01:01+01:00[Europe/Warsaw] TO 2025-01-01T01:01:01+01:00[Europe/Warsaw]]");
|
||||||
|
|
||||||
|
// Escaped brackets are parsed in quoted terms
|
||||||
|
assertQueryEquals("[\"a\\[1\\]\" TO \"b\\[2\\]\"]", a, "[a[1] TO b[2]]");
|
||||||
|
assertQueryEquals("{\"a\\{1\\}\" TO \"b\\{2\\}\"}", a, "{a{1} TO b{2}}");
|
||||||
|
|
||||||
|
// Unescaped closing brackets in a term should throw an exception
|
||||||
|
assertParseException("[] TO def]");
|
||||||
|
assertParseException("[]abc TO def]");
|
||||||
|
assertParseException("[abc] TO def]");
|
||||||
|
assertParseException("[abc TO ]]");
|
||||||
|
assertParseException("[abc TO ]def]");
|
||||||
|
assertParseException("[abc TO def]]");
|
||||||
|
|
||||||
|
assertParseException("{} TO def}");
|
||||||
|
assertParseException("{}abc TO def}");
|
||||||
|
assertParseException("{abc} TO def}");
|
||||||
|
assertParseException("{abc TO }}");
|
||||||
|
assertParseException("{abc TO }def}");
|
||||||
|
assertParseException("{abc TO def}}");
|
||||||
|
|
||||||
|
// Escaped brackets should not work as range query wrappers
|
||||||
|
assertParseException("\\[abc TO def]");
|
||||||
|
assertParseException("\\{abc TO def}");
|
||||||
|
assertParseException("[abc TO def\\]");
|
||||||
|
assertParseException("{abc TO def\\}");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRangeQueryWithEscapedSpaces() throws Exception {
|
||||||
|
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
||||||
|
|
||||||
|
// Escaped spaces are parsed
|
||||||
|
assertQueryEquals("[\\ TO \\ ]", a, "[ TO ]");
|
||||||
|
assertQueryEquals("[\\ \\ TO \\ \\ ]", a, "[ TO ]");
|
||||||
|
assertQueryEquals("[\\ TO def]", a, "[ TO def]");
|
||||||
|
assertQueryEquals("[\\ abc TO def]", a, "[ abc TO def]");
|
||||||
|
assertQueryEquals("[a\\ bc TO def]", a, "[a bc TO def]");
|
||||||
|
assertQueryEquals("[abc\\ TO def]", a, "[abc TO def]");
|
||||||
|
assertQueryEquals("[abc TO \\ ]", a, "[abc TO ]");
|
||||||
|
assertQueryEquals("[abc TO \\ def]", a, "[abc TO def]");
|
||||||
|
assertQueryEquals("[abc TO d\\ ef]", a, "[abc TO d ef]");
|
||||||
|
assertQueryEquals("[abc TO def\\ ]", a, "[abc TO def ]");
|
||||||
|
|
||||||
|
// Escaped spaces are parsed in quoted terms
|
||||||
|
assertQueryEquals("[\"a\\ 1\" TO \"b\\ 2\"]", a, "[a 1 TO b 2]");
|
||||||
|
|
||||||
|
// Escaped spaces should not work as breaks around TO
|
||||||
|
assertParseException("[a\\ TO b]");
|
||||||
|
assertParseException("[a TO\\ b]");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRangeQueryWithMultipleEscapes() throws Exception {
|
||||||
|
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
||||||
|
|
||||||
|
// Double escapes are parsed as a literal backslash
|
||||||
|
assertQueryEquals("[\\\\ TO def]", a, "[\\ TO def]");
|
||||||
|
assertQueryEquals("[\\\\abc TO def]", a, "[\\abc TO def]");
|
||||||
|
assertQueryEquals("[a\\\\bc TO def]", a, "[a\\bc TO def]");
|
||||||
|
assertQueryEquals("[abc\\\\ TO def]", a, "[abc\\ TO def]");
|
||||||
|
assertQueryEquals("[abc TO \\\\]", a, "[abc TO \\]");
|
||||||
|
assertQueryEquals("[abc TO \\\\def]", a, "[abc TO \\def]");
|
||||||
|
assertQueryEquals("[abc TO d\\\\ef]", a, "[abc TO d\\ef]");
|
||||||
|
assertQueryEquals("[abc TO def\\\\]", a, "[abc TO def\\]");
|
||||||
|
|
||||||
|
// Three escapes are parsed as a literal backslash then an escaped next char
|
||||||
|
assertQueryEquals("[a\\\\\\]c TO def]", a, "[a\\]c TO def]");
|
||||||
|
assertQueryEquals("[abc\\\\\\* TO def]", a, "[abc\\* TO def]");
|
||||||
|
assertQueryEquals("[abc\\\\\\ xyz TO def]", a, "[abc\\ xyz TO def]");
|
||||||
|
|
||||||
|
// Four escapes are parsed as two literal backslashes
|
||||||
|
assertQueryEquals("[abc TO \\\\\\\\def]", a, "[abc TO \\\\def]");
|
||||||
|
|
||||||
|
assertQueryEquals(
|
||||||
|
"[c\\:\\\\temp\\\\\\~foo0.txt TO c\\:\\\\temp\\\\\\~foo9.txt]",
|
||||||
|
a,
|
||||||
|
"[c:\\temp\\~foo0.txt TO c:\\temp\\~foo9.txt]");
|
||||||
|
}
|
||||||
|
|
||||||
public void testParsesBracketsIfQuoted() throws Exception {
|
public void testParsesBracketsIfQuoted() throws Exception {
|
||||||
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
|
||||||
|
|
||||||
|
@ -1037,10 +1145,6 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
||||||
"[ \"2024-01-01T01:01:01+01:00[Europe/Warsaw]\" TO \"2025-01-01T01:01:01+01:00[Europe/Warsaw]\" ]",
|
"[ \"2024-01-01T01:01:01+01:00[Europe/Warsaw]\" TO \"2025-01-01T01:01:01+01:00[Europe/Warsaw]\" ]",
|
||||||
null,
|
null,
|
||||||
"[2024-01-01t01:01:01+01:00[europe/warsaw] TO 2025-01-01t01:01:01+01:00[europe/warsaw]]");
|
"[2024-01-01t01:01:01+01:00[europe/warsaw] TO 2025-01-01t01:01:01+01:00[europe/warsaw]]");
|
||||||
|
|
||||||
// If the range terms aren't wrapped in quotes, a closing bracket will throw
|
|
||||||
assertParseException("[a[i] TO b[i]]");
|
|
||||||
assertParseException("[a\\[i\\] TO b\\[i\\]]");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract void testStarParsing() throws Exception;
|
public abstract void testStarParsing() throws Exception;
|
||||||
|
|
Loading…
Reference in New Issue