From 57cf17d188e5172548f38bfb1a098e7410626e2f Mon Sep 17 00:00:00 2001 From: Michael Busch Date: Thu, 8 Mar 2007 04:18:56 +0000 Subject: [PATCH] LUCENE-372: QueryParser.parse() now ensures that the entire input string is consumed. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@515914 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 4 ++ .../lucene/queryParser/QueryParser.java | 12 ++++- .../apache/lucene/queryParser/QueryParser.jj | 15 +++++- .../TestMultiFieldQueryParser.java | 2 +- .../lucene/queryParser/TestQueryParser.java | 50 +++++++++---------- 5 files changed, 53 insertions(+), 30 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 7794c2d2ca1..52ba4ac60bb 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -53,6 +53,10 @@ Bug fixes list to contain all possible characters, because every character that follows a backslash should be considered as escaped. (Michael Busch) + 8. LUCENE-372: QueryParser.parse() now ensures that the entire input string + is consumed. Now a ParseException is thrown if a query contains too many + closing parentheses. (Andreas Neumann via Michael Busch) + New features 1. LUCENE-759: Added two n-gram-producing TokenFilters. diff --git a/src/java/org/apache/lucene/queryParser/QueryParser.java b/src/java/org/apache/lucene/queryParser/QueryParser.java index 94cbbc931ab..14b2611d4e2 100644 --- a/src/java/org/apache/lucene/queryParser/QueryParser.java +++ b/src/java/org/apache/lucene/queryParser/QueryParser.java @@ -142,7 +142,8 @@ public class QueryParser implements QueryParserConstants { public Query parse(String query) throws ParseException { ReInit(new FastCharStream(new StringReader(query))); try { - return Query(field); + // TopLevelQuery is a Query followed by the end-of-input (EOF) + return TopLevelQuery(field); } catch (ParseException tme) { // rethrow to include the original query: @@ -884,6 +885,15 @@ public class QueryParser implements QueryParserConstants { throw new Error("Missing return statement in function"); } +// This makes sure that there is no garbage after the query string + final public Query TopLevelQuery(String field) throws ParseException { + Query q; + q = Query(field); + jj_consume_token(0); + {if (true) return q;} + throw new Error("Missing return statement in function"); + } + final public Query Query(String field) throws ParseException { Vector clauses = new Vector(); Query q, firstQuery=null; diff --git a/src/java/org/apache/lucene/queryParser/QueryParser.jj b/src/java/org/apache/lucene/queryParser/QueryParser.jj index c1a29248434..cb20a53df24 100644 --- a/src/java/org/apache/lucene/queryParser/QueryParser.jj +++ b/src/java/org/apache/lucene/queryParser/QueryParser.jj @@ -166,7 +166,8 @@ public class QueryParser { public Query parse(String query) throws ParseException { ReInit(new FastCharStream(new StringReader(query))); try { - return Query(field); + // TopLevelQuery is a Query followed by the end-of-input (EOF) + return TopLevelQuery(field); } catch (ParseException tme) { // rethrow to include the original query: @@ -931,6 +932,18 @@ int Modifiers() : { { return ret; } } +// This makes sure that there is no garbage after the query string +Query TopLevelQuery(String field) : +{ + Query q; +} +{ + q=Query(field) + { + return q; + } +} + Query Query(String field) : { Vector clauses = new Vector(); diff --git a/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java b/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java index f087efa0752..373476f0c18 100644 --- a/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java +++ b/src/test/org/apache/lucene/queryParser/TestMultiFieldQueryParser.java @@ -55,7 +55,7 @@ public class TestMultiFieldQueryParser extends TestCase { q = mfqp.parse("+one +two"); assertEquals("+(b:one t:one) +(b:two t:two)", q.toString()); - q = mfqp.parse("+one -two -three)"); + q = mfqp.parse("+one -two -three"); assertEquals("+(b:one t:one) -(b:two t:two) -(b:three t:three)", q.toString()); q = mfqp.parse("one^2 two"); diff --git a/src/test/org/apache/lucene/queryParser/TestQueryParser.java b/src/test/org/apache/lucene/queryParser/TestQueryParser.java index aa4ecec5215..a019771d7a1 100644 --- a/src/test/org/apache/lucene/queryParser/TestQueryParser.java +++ b/src/test/org/apache/lucene/queryParser/TestQueryParser.java @@ -298,12 +298,9 @@ public class TestQueryParser extends TestCase { fq = (FuzzyQuery)getQuery("term~", null); assertEquals(0.5f, fq.getMinSimilarity(), 0.1f); assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength()); - try { - getQuery("term~1.1", null); // value > 1, throws exception - fail(); - } catch(ParseException pe) { - // expected exception - } + + assertParseException("term~1.1"); // value > 1, throws exception + assertTrue(getQuery("term*germ", null) instanceof WildcardQuery); /* Tests to see that wild card terms are (or are not) properly @@ -566,11 +563,7 @@ public class TestQueryParser extends TestCase { assertQueryEquals("c\\:\\\\temp\\\\\\~foo.txt", a, "c:\\temp\\~foo.txt"); - - try { - assertQueryEquals("XY\\", a, "XYZ"); - fail("ParseException expected, not thrown"); - } catch (ParseException expected) {} + assertParseException("XY\\"); // there must be a character after the escape char // test unicode escaping assertQueryEquals("a\\u0062c", a, "abc"); @@ -578,24 +571,16 @@ public class TestQueryParser extends TestCase { assertQueryEquals("XY\\u005A", a, "XYZ"); assertQueryEquals("\"a \\\\\\u0028\\u0062\\\" c\"", a, "\"a \\(b\" c\""); - try { - assertQueryEquals("XY\\u005G", a, "XYZ"); - fail("ParseException expected, not thrown"); - } catch (ParseException expected) {} - - try { - assertQueryEquals("XY\\u005", a, "XYZ"); - fail("ParseException expected, not thrown"); - } catch (ParseException expected) {} + assertParseException("XY\\u005G"); // test non-hex character in escaped unicode sequence + assertParseException("XY\\u005"); // test incomplete escaped unicode sequence // Tests bug LUCENE-800 assertQueryEquals("(item:\\\\ item:ABCD\\\\)", a, "item:\\ item:ABCD\\"); + assertParseException("(item:\\\\ item:ABCD\\\\))"); // unmatched closing paranthesis assertQueryEquals("\\*", a, "*"); assertQueryEquals("\\\\", a, "\\"); // escaped backslash - try { - assertQueryEquals("\\", a, "\\"); - fail("ParseException expected not thrown (backslash must be escaped)"); - } catch (ParseException expected) {} + + assertParseException("\\"); // a backslash must always be escaped } public void testQueryStringEscaping() throws Exception { @@ -701,13 +686,24 @@ public class TestQueryParser extends TestCase { assertEquals(1.0f, q.getBoost(), 0.01f); } - public void testException() throws Exception { + public void assertParseException(String queryString) throws Exception { try { - assertQueryEquals("\"some phrase", null, "abc"); - fail("ParseException expected, not thrown"); + Query q = getQuery(queryString, null); } catch (ParseException expected) { + return; } + fail("ParseException expected, not thrown"); } + + public void testException() throws Exception { + assertParseException("\"some phrase"); + assertParseException("(foo bar"); + assertParseException("foo bar))"); + assertParseException("field:term:with:colon some more terms"); + assertParseException("(sub query)^5.0^2.0 plus more"); + assertParseException("secret AND illegal) AND access:confidential"); + } + public void testCustomQueryParserWildcard() { try {