Parse escaped brackets and spaces in range queries

This commit is contained in:
Ben Chaplin 2024-10-11 01:18:29 -04:00
parent 4f8035c013
commit eb7b1e4bbd
4 changed files with 168 additions and 28 deletions

View File

@ -1,9 +1,9 @@
{ {
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java": "7a8a8fd5b2ea78f9a17f54cbae8b0e4496e8988e", "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java": "7a8a8fd5b2ea78f9a17f54cbae8b0e4496e8988e",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java": "a2b7d21092d21cbac290cb1ddde5ac161824fb83", "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java": "a2b7d21092d21cbac290cb1ddde5ac161824fb83",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj": "ed9f248e1a48cadeeab8f0a79e77e986e34ff721", "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj": "64bc53b09c0665afd281f1d85a2db802e5f75266",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserConstants.java": "e59a3fd38b66a3d56779c55955c1e014225a1f50", "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserConstants.java": "e59a3fd38b66a3d56779c55955c1e014225a1f50",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java": "dc99a1083bfa50e429d40e114fabe7dd5d434693", "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java": "b83ebcc6c97618f1986573cb294d6180f1943f77",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java": "310665ba37d982327fcb55cc3523d629ef29ef54", "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java": "310665ba37d982327fcb55cc3523d629ef29ef54",
"lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java": "7e2dd6ab7489048bb70f3077ca9fed90f925ec33" "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java": "7e2dd6ab7489048bb70f3077ca9fed90f925ec33"
} }

View File

@ -223,7 +223,7 @@ PARSER_END(QueryParser)
| <RANGEIN_END: "]"> : DEFAULT | <RANGEIN_END: "]"> : DEFAULT
| <RANGEEX_END: "}"> : DEFAULT | <RANGEEX_END: "}"> : DEFAULT
| <RANGE_QUOTED: "\"" (~["\""] | "\\\"")+ "\""> | <RANGE_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
| <RANGE_GOOP: (~[ " ", "]", "}" ])+ > | <RANGE_GOOP: (~[ "\\", " ", "]", "}" ] | "\\" ~[])+ >
} }
// * Query ::= ( Clause )* // * Query ::= ( Clause )*

View File

@ -721,7 +721,7 @@ private final int jjStopStringLiteralDfa_1(int pos, long active0){
if ((active0 & 0x10000000L) != 0L) if ((active0 & 0x10000000L) != 0L)
{ {
jjmatchedKind = 32; jjmatchedKind = 32;
return 6; return 9;
} }
return -1; return -1;
default : default :
@ -754,7 +754,7 @@ private int jjMoveStringLiteralDfa1_1(long active0){
{ {
case 79: case 79:
if ((active0 & 0x10000000L) != 0L) if ((active0 & 0x10000000L) != 0L)
return jjStartNfaWithStates_1(1, 28, 6); return jjStartNfaWithStates_1(1, 28, 9);
break; break;
default : default :
break; break;
@ -772,7 +772,7 @@ private int jjStartNfaWithStates_1(int pos, int kind, int state)
private int jjMoveNfa_1(int startState, int curPos) private int jjMoveNfa_1(int startState, int curPos)
{ {
int startsAt = 0; int startsAt = 0;
jjnewStateCnt = 7; jjnewStateCnt = 9;
int i = 1; int i = 1;
jjstateSet[0] = startState; jjstateSet[0] = startState;
int kind = 0x7fffffff; int kind = 0x7fffffff;
@ -792,7 +792,7 @@ private int jjMoveNfa_1(int startState, int curPos)
{ {
if (kind > 32) if (kind > 32)
kind = 32; kind = 32;
{ jjCheckNAdd(6); } { jjCheckNAddTwoStates(6, 7); }
} }
if ((0x100002600L & l) != 0L) if ((0x100002600L & l) != 0L)
{ {
@ -802,6 +802,14 @@ private int jjMoveNfa_1(int startState, int curPos)
else if (curChar == 34) else if (curChar == 34)
{ jjCheckNAddTwoStates(2, 4); } { jjCheckNAddTwoStates(2, 4); }
break; break;
case 9:
case 6:
if ((0xfffffffeffffffffL & l) == 0L)
break;
if (kind > 32)
kind = 32;
{ jjCheckNAddTwoStates(6, 7); }
break;
case 1: case 1:
if (curChar == 34) if (curChar == 34)
{ jjCheckNAddTwoStates(2, 4); } { jjCheckNAddTwoStates(2, 4); }
@ -818,12 +826,10 @@ private int jjMoveNfa_1(int startState, int curPos)
if (curChar == 34 && kind > 31) if (curChar == 34 && kind > 31)
kind = 31; kind = 31;
break; break;
case 6: case 8:
if ((0xfffffffeffffffffL & l) == 0L)
break;
if (kind > 32) if (kind > 32)
kind = 32; kind = 32;
{ jjCheckNAdd(6); } { jjCheckNAddTwoStates(6, 7); }
break; break;
default : break; default : break;
} }
@ -837,12 +843,24 @@ private int jjMoveNfa_1(int startState, int curPos)
switch(jjstateSet[--i]) switch(jjstateSet[--i])
{ {
case 0: case 0:
case 6: if ((0xdfffffffcfffffffL & l) != 0L)
if ((0xdfffffffdfffffffL & l) == 0L) {
break;
if (kind > 32) if (kind > 32)
kind = 32; kind = 32;
{ jjCheckNAdd(6); } { jjCheckNAddTwoStates(6, 7); }
}
else if (curChar == 92)
jjstateSet[jjnewStateCnt++] = 8;
break;
case 9:
if ((0xdfffffffcfffffffL & l) != 0L)
{
if (kind > 32)
kind = 32;
{ jjCheckNAddTwoStates(6, 7); }
}
else if (curChar == 92)
jjstateSet[jjnewStateCnt++] = 8;
break; break;
case 2: case 2:
{ jjAddStates(29, 31); } { jjAddStates(29, 31); }
@ -851,6 +869,22 @@ private int jjMoveNfa_1(int startState, int curPos)
if (curChar == 92) if (curChar == 92)
jjstateSet[jjnewStateCnt++] = 3; jjstateSet[jjnewStateCnt++] = 3;
break; break;
case 6:
if ((0xdfffffffcfffffffL & l) == 0L)
break;
if (kind > 32)
kind = 32;
{ jjCheckNAddTwoStates(6, 7); }
break;
case 7:
if (curChar == 92)
jjstateSet[jjnewStateCnt++] = 8;
break;
case 8:
if (kind > 32)
kind = 32;
{ jjCheckNAddTwoStates(6, 7); }
break;
default : break; default : break;
} }
} while(i != startsAt); } while(i != startsAt);
@ -876,19 +910,21 @@ private int jjMoveNfa_1(int startState, int curPos)
{ {
if (kind > 32) if (kind > 32)
kind = 32; kind = 32;
{ jjCheckNAdd(6); } { jjCheckNAddTwoStates(6, 7); }
} }
break; break;
case 2: case 9:
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
{ jjAddStates(29, 31); }
break;
case 6: case 6:
case 8:
if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
break; break;
if (kind > 32) if (kind > 32)
kind = 32; kind = 32;
{ jjCheckNAdd(6); } { jjCheckNAddTwoStates(6, 7); }
break;
case 2:
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
{ jjAddStates(29, 31); }
break; break;
default : if (i1 == 0 || l1 == 0 || i2 == 0 || l2 == 0) break; else break; default : if (i1 == 0 || l1 == 0 || i2 == 0 || l2 == 0) break; else break;
} }
@ -901,7 +937,7 @@ private int jjMoveNfa_1(int startState, int curPos)
kind = 0x7fffffff; kind = 0x7fffffff;
} }
++curPos; ++curPos;
if ((i = jjnewStateCnt) == (startsAt = 7 - (jjnewStateCnt = startsAt))) if ((i = jjnewStateCnt) == (startsAt = 9 - (jjnewStateCnt = startsAt)))
return curPos; return curPos;
try { curChar = input_stream.readChar(); } try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) { return curPos; } catch(java.io.IOException e) { return curPos; }

View File

@ -1020,6 +1020,114 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
// iw.addDocument(d); // iw.addDocument(d);
// } // }
public void testRangeQueryWithEscapedClosingBrackets() throws Exception {
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
// Escaped brackets are parsed
assertQueryEquals("[\\] TO def]", a, "[] TO def]");
assertQueryEquals("[\\]abc TO def]", a, "[]abc TO def]");
assertQueryEquals("[a\\]bc TO def]", a, "[a]bc TO def]");
assertQueryEquals("[abc\\] TO def]", a, "[abc] TO def]");
assertQueryEquals("[abc TO \\]]", a, "[abc TO ]]");
assertQueryEquals("[abc TO \\]def]", a, "[abc TO ]def]");
assertQueryEquals("[abc TO d\\]ef]", a, "[abc TO d]ef]");
assertQueryEquals("[abc TO def\\]]", a, "[abc TO def]]");
assertQueryEquals("{\\} TO def}", a, "{} TO def}");
assertQueryEquals("{\\}abc TO def}", a, "{}abc TO def}");
assertQueryEquals("{a\\}bc TO def}", a, "{a}bc TO def}");
assertQueryEquals("{abc\\} TO def}", a, "{abc} TO def}");
assertQueryEquals("{abc TO \\}}", a, "{abc TO }}");
assertQueryEquals("{abc TO \\}def}", a, "{abc TO }def}");
assertQueryEquals("{abc TO d\\}ef}", a, "{abc TO d}ef}");
assertQueryEquals("{abc TO def\\}}", a, "{abc TO def}}");
assertQueryEquals("[\\[\\] TO \\[\\]]", a, "[[] TO []]");
assertQueryEquals("[\\{\\} TO \\{\\}]", a, "[{} TO {}]");
assertQueryEquals("{\\[\\] TO \\[\\]}", a, "{[] TO []}");
assertQueryEquals("{\\{\\} TO \\{\\}}", a, "{{} TO {}}");
assertQueryEquals(
"[ 2024-01-01T01:01:01+01:00\\[Europe/Warsaw\\] TO 2025-01-01T01:01:01+01:00\\[Europe/Warsaw\\] ]",
a,
"[2024-01-01T01:01:01+01:00[Europe/Warsaw] TO 2025-01-01T01:01:01+01:00[Europe/Warsaw]]");
// Escaped brackets are parsed in quoted terms
assertQueryEquals("[\"a\\[1\\]\" TO \"b\\[2\\]\"]", a, "[a[1] TO b[2]]");
assertQueryEquals("{\"a\\{1\\}\" TO \"b\\{2\\}\"}", a, "{a{1} TO b{2}}");
// Unescaped closing brackets in a term should throw an exception
assertParseException("[] TO def]");
assertParseException("[]abc TO def]");
assertParseException("[abc] TO def]");
assertParseException("[abc TO ]]");
assertParseException("[abc TO ]def]");
assertParseException("[abc TO def]]");
assertParseException("{} TO def}");
assertParseException("{}abc TO def}");
assertParseException("{abc} TO def}");
assertParseException("{abc TO }}");
assertParseException("{abc TO }def}");
assertParseException("{abc TO def}}");
// Escaped brackets should not work as range query wrappers
assertParseException("\\[abc TO def]");
assertParseException("\\{abc TO def}");
assertParseException("[abc TO def\\]");
assertParseException("{abc TO def\\}");
}
public void testRangeQueryWithEscapedSpaces() throws Exception {
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
// Escaped spaces are parsed
assertQueryEquals("[\\ TO \\ ]", a, "[ TO ]");
assertQueryEquals("[\\ \\ TO \\ \\ ]", a, "[ TO ]");
assertQueryEquals("[\\ TO def]", a, "[ TO def]");
assertQueryEquals("[\\ abc TO def]", a, "[ abc TO def]");
assertQueryEquals("[a\\ bc TO def]", a, "[a bc TO def]");
assertQueryEquals("[abc\\ TO def]", a, "[abc TO def]");
assertQueryEquals("[abc TO \\ ]", a, "[abc TO ]");
assertQueryEquals("[abc TO \\ def]", a, "[abc TO def]");
assertQueryEquals("[abc TO d\\ ef]", a, "[abc TO d ef]");
assertQueryEquals("[abc TO def\\ ]", a, "[abc TO def ]");
// Escaped spaces are parsed in quoted terms
assertQueryEquals("[\"a\\ 1\" TO \"b\\ 2\"]", a, "[a 1 TO b 2]");
// Escaped spaces should not work as breaks around TO
assertParseException("[a\\ TO b]");
assertParseException("[a TO\\ b]");
}
public void testRangeQueryWithMultipleEscapes() throws Exception {
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
// Double escapes are parsed as a literal backslash
assertQueryEquals("[\\\\ TO def]", a, "[\\ TO def]");
assertQueryEquals("[\\\\abc TO def]", a, "[\\abc TO def]");
assertQueryEquals("[a\\\\bc TO def]", a, "[a\\bc TO def]");
assertQueryEquals("[abc\\\\ TO def]", a, "[abc\\ TO def]");
assertQueryEquals("[abc TO \\\\]", a, "[abc TO \\]");
assertQueryEquals("[abc TO \\\\def]", a, "[abc TO \\def]");
assertQueryEquals("[abc TO d\\\\ef]", a, "[abc TO d\\ef]");
assertQueryEquals("[abc TO def\\\\]", a, "[abc TO def\\]");
// Three escapes are parsed as a literal backslash then an escaped next char
assertQueryEquals("[a\\\\\\]c TO def]", a, "[a\\]c TO def]");
assertQueryEquals("[abc\\\\\\* TO def]", a, "[abc\\* TO def]");
assertQueryEquals("[abc\\\\\\ xyz TO def]", a, "[abc\\ xyz TO def]");
// Four escapes are parsed as two literal backslashes
assertQueryEquals("[abc TO \\\\\\\\def]", a, "[abc TO \\\\def]");
assertQueryEquals(
"[c\\:\\\\temp\\\\\\~foo0.txt TO c\\:\\\\temp\\\\\\~foo9.txt]",
a,
"[c:\\temp\\~foo0.txt TO c:\\temp\\~foo9.txt]");
}
public void testParsesBracketsIfQuoted() throws Exception { public void testParsesBracketsIfQuoted() throws Exception {
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
@ -1037,10 +1145,6 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
"[ \"2024-01-01T01:01:01+01:00[Europe/Warsaw]\" TO \"2025-01-01T01:01:01+01:00[Europe/Warsaw]\" ]", "[ \"2024-01-01T01:01:01+01:00[Europe/Warsaw]\" TO \"2025-01-01T01:01:01+01:00[Europe/Warsaw]\" ]",
null, null,
"[2024-01-01t01:01:01+01:00[europe/warsaw] TO 2025-01-01t01:01:01+01:00[europe/warsaw]]"); "[2024-01-01t01:01:01+01:00[europe/warsaw] TO 2025-01-01t01:01:01+01:00[europe/warsaw]]");
// If the range terms aren't wrapped in quotes, a closing bracket will throw
assertParseException("[a[i] TO b[i]]");
assertParseException("[a\\[i\\] TO b\\[i\\]]");
} }
public abstract void testStarParsing() throws Exception; public abstract void testStarParsing() throws Exception;