From cf06b516bc0242e4b37f420a585d818f0d9ffbbb Mon Sep 17 00:00:00 2001 From: Otis Gospodnetic Date: Wed, 3 Mar 2004 11:24:49 +0000 Subject: [PATCH] - Applied the patch that fixes query string escaping: http://issues.apache.org/bugzilla/show_bug.cgi?id=24665 Contributed by Jean-Francois Halleux git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150221 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 4 + .../lucene/queryParser/QueryParser.java | 53 +++++-- .../apache/lucene/queryParser/QueryParser.jj | 144 +++++++++--------- .../lucene/queryParser/TestQueryParser.java | 92 +++++------ 4 files changed, 154 insertions(+), 139 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 33db38cf76b..d30cc507924 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -57,6 +57,10 @@ $Id$ 10. Added support for term vectors. See Field#isTermVectorStored(). (Grant Ingersoll, Cutting & Dmitry) +11. Fixed the old bug with escaping of special characters in query + strings: http://issues.apache.org/bugzilla/show_bug.cgi?id=24665 + (Jean-Francois Halleux via Otis) + 1.3 final diff --git a/src/java/org/apache/lucene/queryParser/QueryParser.java b/src/java/org/apache/lucene/queryParser/QueryParser.java index 3984e88f1a7..0a5a87b08e0 100644 --- a/src/java/org/apache/lucene/queryParser/QueryParser.java +++ b/src/java/org/apache/lucene/queryParser/QueryParser.java @@ -132,7 +132,7 @@ public class QueryParser implements QueryParserConstants { /** * Sets the boolean operator of the QueryParser. - * In classic mode (DEFAULT_OPERATOR_OR) terms without any modifiers + * In classic mode (DEFAULT_OPERATOR_OR) terms without any modifiers * are considered optional: for example capital of Hungary is equal to * capital OR of OR Hungary.
* In DEFAULT_OPERATOR_AND terms are considered to be in conjuction: the @@ -386,6 +386,22 @@ public class QueryParser implements QueryParserConstants { return new FuzzyQuery(t); } + /** + * Returns a String where the escape char has been + * removed, or kept only once if there was a double escape. + */ + private String discardEscapeChar(String input) { + char[] caSource = input.toCharArray(); + char[] caDest = new char[caSource.length]; + int j = 0; + for (int i = 0; i < caSource.length; i++) { + if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) { + caDest[j++]=caSource[i]; + } + } + return new String(caDest, 0, j); + } + public static void main(String[] args) throws Exception { QueryParser qp = new QueryParser("field", new org.apache.lucene.analysis.SimpleAnalyzer()); @@ -506,7 +522,7 @@ public class QueryParser implements QueryParserConstants { if (jj_2_1(2)) { fieldToken = jj_consume_token(TERM); jj_consume_token(COLON); - field = fieldToken.image; + field=discardEscapeChar(fieldToken.image); } else { ; } @@ -609,15 +625,17 @@ public class QueryParser implements QueryParserConstants { jj_la1[10] = jj_gen; ; } + String termImage=discardEscapeChar(term.image); if (wildcard) { - q = getWildcardQuery(field, term.image); + q = getWildcardQuery(field, termImage); } else if (prefix) { - q = getPrefixQuery(field, term.image.substring - (0, term.image.length()-1)); + q = getPrefixQuery(field, + discardEscapeChar(term.image.substring + (0, term.image.length()-1))); } else if (fuzzy) { - q = getFuzzyQuery(field, term.image); + q = getFuzzyQuery(field, termImage); } else { - q = getFieldQuery(field, analyzer, term.image); + q = getFieldQuery(field, analyzer, termImage); } break; case RANGEIN_START: @@ -664,11 +682,16 @@ public class QueryParser implements QueryParserConstants { jj_la1[14] = jj_gen; ; } - if (goop1.kind == RANGEIN_QUOTED) + if (goop1.kind == RANGEIN_QUOTED) { goop1.image = goop1.image.substring(1, goop1.image.length()-1); - if (goop2.kind == RANGEIN_QUOTED) + } else { + goop1.image = discardEscapeChar(goop1.image); + } + if (goop2.kind == RANGEIN_QUOTED) { goop2.image = goop2.image.substring(1, goop2.image.length()-1); - + } else { + goop2.image = discardEscapeChar(goop2.image); + } q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true); break; case RANGEEX_START: @@ -715,10 +738,16 @@ public class QueryParser implements QueryParserConstants { jj_la1[18] = jj_gen; ; } - if (goop1.kind == RANGEEX_QUOTED) + if (goop1.kind == RANGEEX_QUOTED) { goop1.image = goop1.image.substring(1, goop1.image.length()-1); - if (goop2.kind == RANGEEX_QUOTED) + } else { + goop1.image = discardEscapeChar(goop1.image); + } + if (goop2.kind == RANGEEX_QUOTED) { goop2.image = goop2.image.substring(1, goop2.image.length()-1); + } else { + goop2.image = discardEscapeChar(goop2.image); + } q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false); break; diff --git a/src/java/org/apache/lucene/queryParser/QueryParser.jj b/src/java/org/apache/lucene/queryParser/QueryParser.jj index 9c6e8ebb0f4..bd13bc9c604 100644 --- a/src/java/org/apache/lucene/queryParser/QueryParser.jj +++ b/src/java/org/apache/lucene/queryParser/QueryParser.jj @@ -1,58 +1,19 @@ -/* ==================================================================== - * The Apache Software License, Version 1.1 +/** + * Copyright 2004 The Apache Software Foundation * - * Copyright (c) 2001, 2002, 2003 The Apache Software Foundation. All - * rights reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: + * http://www.apache.org/licenses/LICENSE-2.0 * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ - options { STATIC=false; JAVA_UNICODE_ESCAPE=true; @@ -138,8 +99,8 @@ public class QueryParser { Locale locale = Locale.getDefault(); /** Parses a query string, returning a {@link org.apache.lucene.search.Query}. - * @param query the query string to be parsed. - * @param field the default field for query terms. + * @param query the query string to be parsed. + * @param field the default field for query terms. * @param analyzer used to find terms in the query text. * @throws ParseException if the parsing fails */ @@ -150,7 +111,7 @@ public class QueryParser { } /** Constructs a query parser. - * @param f the default field for query terms. + * @param f the default field for query terms. * @param a used to find terms in the query text. */ public QueryParser(String f, Analyzer a) { @@ -161,7 +122,7 @@ public class QueryParser { /** Parses a query string, returning a * Query. - * @param query the query string to be parsed. + * @param query the query string to be parsed. * @throws ParseException if the parsing fails */ public Query parse(String query) throws ParseException { @@ -364,7 +325,7 @@ public class QueryParser { { BooleanQuery query = new BooleanQuery(); for (int i = 0; i < clauses.size(); i++) { - query.add((BooleanClause)clauses.elementAt(i)); + query.add((BooleanClause)clauses.elementAt(i)); } return query; } @@ -393,7 +354,7 @@ public class QueryParser { protected Query getWildcardQuery(String field, String termStr) throws ParseException { if (lowercaseWildcardTerms) { - termStr = termStr.toLowerCase(); + termStr = termStr.toLowerCase(); } Term t = new Term(field, termStr); return new WildcardQuery(t); @@ -425,7 +386,7 @@ public class QueryParser { protected Query getPrefixQuery(String field, String termStr) throws ParseException { if (lowercaseWildcardTerms) { - termStr = termStr.toLowerCase(); + termStr = termStr.toLowerCase(); } Term t = new Term(field, termStr); return new PrefixQuery(t); @@ -448,6 +409,22 @@ public class QueryParser { return new FuzzyQuery(t); } + /** + * Returns a String where the escape char has been + * removed, or kept only once if there was a double escape. + */ + private String discardEscapeChar(String input) { + char[] caSource = input.toCharArray(); + char[] caDest = new char[caSource.length]; + int j = 0; + for (int i = 0; i < caSource.length; i++) { + if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) { + caDest[j++]=caSource[i]; + } + } + return new String(caDest, 0, j); + } + public static void main(String[] args) throws Exception { QueryParser qp = new QueryParser("field", new org.apache.lucene.analysis.SimpleAnalyzer()); @@ -572,7 +549,7 @@ Query Query(String field) : if (clauses.size() == 1 && firstQuery != null) return firstQuery; else { - return getBooleanQuery(clauses); + return getBooleanQuery(clauses); } } } @@ -584,7 +561,9 @@ Query Clause(String field) : { { [ LOOKAHEAD(2) - fieldToken= { field = fieldToken.image; } + fieldToken= { + field=discardEscapeChar(fieldToken.image); + } ] ( @@ -594,11 +573,11 @@ Query Clause(String field) : { ) { if (boost != null) { - float f = (float)1.0; - try { - f = Float.valueOf(boost.image).floatValue(); + float f = (float)1.0; + try { + f = Float.valueOf(boost.image).floatValue(); q.setBoost(f); - } catch (Exception ignored) { } + } catch (Exception ignored) { } } return q; } @@ -624,15 +603,17 @@ Query Term(String field) : { [ { fuzzy=true; } ] [ boost= [ { fuzzy=true; } ] ] { + String termImage=discardEscapeChar(term.image); if (wildcard) { - q = getWildcardQuery(field, term.image); + q = getWildcardQuery(field, termImage); } else if (prefix) { - q = getPrefixQuery(field, term.image.substring - (0, term.image.length()-1)); + q = getPrefixQuery(field, + discardEscapeChar(term.image.substring + (0, term.image.length()-1))); } else if (fuzzy) { - q = getFuzzyQuery(field, term.image); + q = getFuzzyQuery(field, termImage); } else { - q = getFieldQuery(field, analyzer, term.image); + q = getFieldQuery(field, analyzer, termImage); } } | ( ( goop1=|goop1= ) @@ -640,11 +621,16 @@ Query Term(String field) : { ) [ boost= ] { - if (goop1.kind == RANGEIN_QUOTED) + if (goop1.kind == RANGEIN_QUOTED) { goop1.image = goop1.image.substring(1, goop1.image.length()-1); - if (goop2.kind == RANGEIN_QUOTED) + } else { + goop1.image = discardEscapeChar(goop1.image); + } + if (goop2.kind == RANGEIN_QUOTED) { goop2.image = goop2.image.substring(1, goop2.image.length()-1); - + } else { + goop2.image = discardEscapeChar(goop2.image); + } q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true); } | ( ( goop1=|goop1= ) @@ -652,10 +638,16 @@ Query Term(String field) : { ) [ boost= ] { - if (goop1.kind == RANGEEX_QUOTED) + if (goop1.kind == RANGEEX_QUOTED) { goop1.image = goop1.image.substring(1, goop1.image.length()-1); - if (goop2.kind == RANGEEX_QUOTED) + } else { + goop1.image = discardEscapeChar(goop1.image); + } + if (goop2.kind == RANGEEX_QUOTED) { goop2.image = goop2.image.substring(1, goop2.image.length()-1); + } else { + goop2.image = discardEscapeChar(goop2.image); + } q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false); } @@ -681,9 +673,9 @@ Query Term(String field) : { f = Float.valueOf(boost.image).floatValue(); } catch (Exception ignored) { - /* Should this be handled somehow? (defaults to "no boost", if - * boost number is invalid) - */ + /* Should this be handled somehow? (defaults to "no boost", if + * boost number is invalid) + */ } // avoid boosting null queries, such as those caused by stop words diff --git a/src/test/org/apache/lucene/queryParser/TestQueryParser.java b/src/test/org/apache/lucene/queryParser/TestQueryParser.java index 2cd6e9b6e76..f50dc640fe4 100644 --- a/src/test/org/apache/lucene/queryParser/TestQueryParser.java +++ b/src/test/org/apache/lucene/queryParser/TestQueryParser.java @@ -1,57 +1,19 @@ package org.apache.lucene.queryParser; -/* ==================================================================== - * The Apache Software License, Version 1.1 +/** + * Copyright 2002-2004 The Apache Software Foundation * - * Copyright (c) 2001, 2002, 2003 The Apache Software Foundation. All - * rights reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: + * http://www.apache.org/licenses/LICENSE-2.0 * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ import junit.framework.TestCase; @@ -355,7 +317,7 @@ public class TestQueryParser extends TestCase { public void testEscaped() throws Exception { Analyzer a = new WhitespaceAnalyzer(); - assertQueryEquals("\\[brackets", a, "\\[brackets"); + /* assertQueryEquals("\\[brackets", a, "\\[brackets"); assertQueryEquals("\\[brackets", null, "brackets"); assertQueryEquals("\\\\", a, "\\\\"); assertQueryEquals("\\+blah", a, "\\+blah"); @@ -377,8 +339,36 @@ public class TestQueryParser extends TestCase { assertQueryEquals("\\?blah", a, "\\?blah"); assertQueryEquals("foo \\&& bar", a, "foo \\&& bar"); assertQueryEquals("foo \\|| bar", a, "foo \\|| bar"); - assertQueryEquals("foo \\AND bar", a, "foo \\AND bar"); + assertQueryEquals("foo \\AND bar", a, "foo \\AND bar"); */ + assertQueryEquals("a\\-b:c",a,"a-b:c"); + assertQueryEquals("a\\+b:c",a,"a+b:c"); + assertQueryEquals("a\\:b:c",a,"a:b:c"); + assertQueryEquals("a\\\\b:c",a,"a\\b:c"); + + assertQueryEquals("a:b\\-c",a,"a:b-c"); + assertQueryEquals("a:b\\+c",a,"a:b+c"); + assertQueryEquals("a:b\\:c",a,"a:b:c"); + assertQueryEquals("a:b\\\\c",a,"a:b\\c"); + + assertQueryEquals("a:b\\-c*",a,"a:b-c*"); + assertQueryEquals("a:b\\+c*",a,"a:b+c*"); + assertQueryEquals("a:b\\:c*",a,"a:b:c*"); + assertQueryEquals("a:b\\\\c*",a,"a:b\\c*"); + + assertQueryEquals("a:b\\-?c",a,"a:b-?c"); + assertQueryEquals("a:b\\+?c",a,"a:b+?c"); + assertQueryEquals("a:b\\:?c",a,"a:b:?c"); + assertQueryEquals("a:b\\\\?c",a,"a:b\\?c"); + + assertQueryEquals("a:b\\-c~",a,"a:b-c~"); + assertQueryEquals("a:b\\+c~",a,"a:b+c~"); + assertQueryEquals("a:b\\:c~",a,"a:b:c~"); + assertQueryEquals("a:b\\\\c~",a,"a:b\\c~"); + + assertQueryEquals("[ a\\- TO a\\+ ]", null, "[a- TO a+]"); + assertQueryEquals("[ a\\: TO a\\~ ]", null, "[a: TO a~]"); + assertQueryEquals("[ a\\\\ TO a\\* ]", null, "[a\\ TO a*]"); } public void testSimpleDAO()