- Applied the patch that fixes query string escaping:

http://issues.apache.org/bugzilla/show_bug.cgi?id=24665
  Contributed by Jean-Francois Halleux


git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150221 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Otis Gospodnetic 2004-03-03 11:24:49 +00:00
parent 17488d5e08
commit cf06b516bc
4 changed files with 154 additions and 139 deletions

View File

@ -57,6 +57,10 @@ $Id$
10. Added support for term vectors. See Field#isTermVectorStored(). 10. Added support for term vectors. See Field#isTermVectorStored().
(Grant Ingersoll, Cutting & Dmitry) (Grant Ingersoll, Cutting & Dmitry)
11. Fixed the old bug with escaping of special characters in query
strings: http://issues.apache.org/bugzilla/show_bug.cgi?id=24665
(Jean-Francois Halleux via Otis)
1.3 final 1.3 final

View File

@ -132,7 +132,7 @@ public class QueryParser implements QueryParserConstants {
/** /**
* Sets the boolean operator of the QueryParser. * Sets the boolean operator of the QueryParser.
* In classic mode (<code>DEFAULT_OPERATOR_OR</mode>) terms without any modifiers * In classic mode (<code>DEFAULT_OPERATOR_OR</code>) terms without any modifiers
* are considered optional: for example <code>capital of Hungary</code> is equal to * are considered optional: for example <code>capital of Hungary</code> is equal to
* <code>capital OR of OR Hungary</code>.<br/> * <code>capital OR of OR Hungary</code>.<br/>
* In <code>DEFAULT_OPERATOR_AND</code> terms are considered to be in conjuction: the * In <code>DEFAULT_OPERATOR_AND</code> terms are considered to be in conjuction: the
@ -386,6 +386,22 @@ public class QueryParser implements QueryParserConstants {
return new FuzzyQuery(t); return new FuzzyQuery(t);
} }
/**
* Returns a String where the escape char has been
* removed, or kept only once if there was a double escape.
*/
private String discardEscapeChar(String input) {
char[] caSource = input.toCharArray();
char[] caDest = new char[caSource.length];
int j = 0;
for (int i = 0; i < caSource.length; i++) {
if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) {
caDest[j++]=caSource[i];
}
}
return new String(caDest, 0, j);
}
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
QueryParser qp = new QueryParser("field", QueryParser qp = new QueryParser("field",
new org.apache.lucene.analysis.SimpleAnalyzer()); new org.apache.lucene.analysis.SimpleAnalyzer());
@ -506,7 +522,7 @@ public class QueryParser implements QueryParserConstants {
if (jj_2_1(2)) { if (jj_2_1(2)) {
fieldToken = jj_consume_token(TERM); fieldToken = jj_consume_token(TERM);
jj_consume_token(COLON); jj_consume_token(COLON);
field = fieldToken.image; field=discardEscapeChar(fieldToken.image);
} else { } else {
; ;
} }
@ -609,15 +625,17 @@ public class QueryParser implements QueryParserConstants {
jj_la1[10] = jj_gen; jj_la1[10] = jj_gen;
; ;
} }
String termImage=discardEscapeChar(term.image);
if (wildcard) { if (wildcard) {
q = getWildcardQuery(field, term.image); q = getWildcardQuery(field, termImage);
} else if (prefix) { } else if (prefix) {
q = getPrefixQuery(field, term.image.substring q = getPrefixQuery(field,
(0, term.image.length()-1)); discardEscapeChar(term.image.substring
(0, term.image.length()-1)));
} else if (fuzzy) { } else if (fuzzy) {
q = getFuzzyQuery(field, term.image); q = getFuzzyQuery(field, termImage);
} else { } else {
q = getFieldQuery(field, analyzer, term.image); q = getFieldQuery(field, analyzer, termImage);
} }
break; break;
case RANGEIN_START: case RANGEIN_START:
@ -664,11 +682,16 @@ public class QueryParser implements QueryParserConstants {
jj_la1[14] = jj_gen; jj_la1[14] = jj_gen;
; ;
} }
if (goop1.kind == RANGEIN_QUOTED) if (goop1.kind == RANGEIN_QUOTED) {
goop1.image = goop1.image.substring(1, goop1.image.length()-1); goop1.image = goop1.image.substring(1, goop1.image.length()-1);
if (goop2.kind == RANGEIN_QUOTED) } else {
goop1.image = discardEscapeChar(goop1.image);
}
if (goop2.kind == RANGEIN_QUOTED) {
goop2.image = goop2.image.substring(1, goop2.image.length()-1); goop2.image = goop2.image.substring(1, goop2.image.length()-1);
} else {
goop2.image = discardEscapeChar(goop2.image);
}
q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true); q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true);
break; break;
case RANGEEX_START: case RANGEEX_START:
@ -715,10 +738,16 @@ public class QueryParser implements QueryParserConstants {
jj_la1[18] = jj_gen; jj_la1[18] = jj_gen;
; ;
} }
if (goop1.kind == RANGEEX_QUOTED) if (goop1.kind == RANGEEX_QUOTED) {
goop1.image = goop1.image.substring(1, goop1.image.length()-1); goop1.image = goop1.image.substring(1, goop1.image.length()-1);
if (goop2.kind == RANGEEX_QUOTED) } else {
goop1.image = discardEscapeChar(goop1.image);
}
if (goop2.kind == RANGEEX_QUOTED) {
goop2.image = goop2.image.substring(1, goop2.image.length()-1); goop2.image = goop2.image.substring(1, goop2.image.length()-1);
} else {
goop2.image = discardEscapeChar(goop2.image);
}
q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false); q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false);
break; break;

View File

@ -1,58 +1,19 @@
/* ==================================================================== /**
* The Apache Software License, Version 1.1 * Copyright 2004 The Apache Software Foundation
* *
* Copyright (c) 2001, 2002, 2003 The Apache Software Foundation. All * Licensed under the Apache License, Version 2.0 (the "License");
* rights reserved. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* *
* Redistribution and use in source and binary forms, with or without * http://www.apache.org/licenses/LICENSE-2.0
* modification, are permitted provided that the following conditions
* are met:
* *
* 1. Redistributions of source code must retain the above copyright * Unless required by applicable law or agreed to in writing, software
* notice, this list of conditions and the following disclaimer. * distributed under the License is distributed on an "AS IS" BASIS,
* * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* 2. Redistributions in binary form must reproduce the above copyright * See the License for the specific language governing permissions and
* notice, this list of conditions and the following disclaimer in * limitations under the License.
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Lucene" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache Lucene", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/ */
options { options {
STATIC=false; STATIC=false;
JAVA_UNICODE_ESCAPE=true; JAVA_UNICODE_ESCAPE=true;
@ -138,8 +99,8 @@ public class QueryParser {
Locale locale = Locale.getDefault(); Locale locale = Locale.getDefault();
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}. /** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
* @param query the query string to be parsed. * @param query the query string to be parsed.
* @param field the default field for query terms. * @param field the default field for query terms.
* @param analyzer used to find terms in the query text. * @param analyzer used to find terms in the query text.
* @throws ParseException if the parsing fails * @throws ParseException if the parsing fails
*/ */
@ -150,7 +111,7 @@ public class QueryParser {
} }
/** Constructs a query parser. /** Constructs a query parser.
* @param f the default field for query terms. * @param f the default field for query terms.
* @param a used to find terms in the query text. * @param a used to find terms in the query text.
*/ */
public QueryParser(String f, Analyzer a) { public QueryParser(String f, Analyzer a) {
@ -161,7 +122,7 @@ public class QueryParser {
/** Parses a query string, returning a /** Parses a query string, returning a
* <a href="lucene.search.Query.html">Query</a>. * <a href="lucene.search.Query.html">Query</a>.
* @param query the query string to be parsed. * @param query the query string to be parsed.
* @throws ParseException if the parsing fails * @throws ParseException if the parsing fails
*/ */
public Query parse(String query) throws ParseException { public Query parse(String query) throws ParseException {
@ -364,7 +325,7 @@ public class QueryParser {
{ {
BooleanQuery query = new BooleanQuery(); BooleanQuery query = new BooleanQuery();
for (int i = 0; i < clauses.size(); i++) { for (int i = 0; i < clauses.size(); i++) {
query.add((BooleanClause)clauses.elementAt(i)); query.add((BooleanClause)clauses.elementAt(i));
} }
return query; return query;
} }
@ -393,7 +354,7 @@ public class QueryParser {
protected Query getWildcardQuery(String field, String termStr) throws ParseException protected Query getWildcardQuery(String field, String termStr) throws ParseException
{ {
if (lowercaseWildcardTerms) { if (lowercaseWildcardTerms) {
termStr = termStr.toLowerCase(); termStr = termStr.toLowerCase();
} }
Term t = new Term(field, termStr); Term t = new Term(field, termStr);
return new WildcardQuery(t); return new WildcardQuery(t);
@ -425,7 +386,7 @@ public class QueryParser {
protected Query getPrefixQuery(String field, String termStr) throws ParseException protected Query getPrefixQuery(String field, String termStr) throws ParseException
{ {
if (lowercaseWildcardTerms) { if (lowercaseWildcardTerms) {
termStr = termStr.toLowerCase(); termStr = termStr.toLowerCase();
} }
Term t = new Term(field, termStr); Term t = new Term(field, termStr);
return new PrefixQuery(t); return new PrefixQuery(t);
@ -448,6 +409,22 @@ public class QueryParser {
return new FuzzyQuery(t); return new FuzzyQuery(t);
} }
/**
* Returns a String where the escape char has been
* removed, or kept only once if there was a double escape.
*/
private String discardEscapeChar(String input) {
char[] caSource = input.toCharArray();
char[] caDest = new char[caSource.length];
int j = 0;
for (int i = 0; i < caSource.length; i++) {
if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) {
caDest[j++]=caSource[i];
}
}
return new String(caDest, 0, j);
}
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
QueryParser qp = new QueryParser("field", QueryParser qp = new QueryParser("field",
new org.apache.lucene.analysis.SimpleAnalyzer()); new org.apache.lucene.analysis.SimpleAnalyzer());
@ -572,7 +549,7 @@ Query Query(String field) :
if (clauses.size() == 1 && firstQuery != null) if (clauses.size() == 1 && firstQuery != null)
return firstQuery; return firstQuery;
else { else {
return getBooleanQuery(clauses); return getBooleanQuery(clauses);
} }
} }
} }
@ -584,7 +561,9 @@ Query Clause(String field) : {
{ {
[ [
LOOKAHEAD(2) LOOKAHEAD(2)
fieldToken=<TERM> <COLON> { field = fieldToken.image; } fieldToken=<TERM> <COLON> {
field=discardEscapeChar(fieldToken.image);
}
] ]
( (
@ -594,11 +573,11 @@ Query Clause(String field) : {
) )
{ {
if (boost != null) { if (boost != null) {
float f = (float)1.0; float f = (float)1.0;
try { try {
f = Float.valueOf(boost.image).floatValue(); f = Float.valueOf(boost.image).floatValue();
q.setBoost(f); q.setBoost(f);
} catch (Exception ignored) { } } catch (Exception ignored) { }
} }
return q; return q;
} }
@ -624,15 +603,17 @@ Query Term(String field) : {
[ <FUZZY> { fuzzy=true; } ] [ <FUZZY> { fuzzy=true; } ]
[ <CARAT> boost=<NUMBER> [ <FUZZY> { fuzzy=true; } ] ] [ <CARAT> boost=<NUMBER> [ <FUZZY> { fuzzy=true; } ] ]
{ {
String termImage=discardEscapeChar(term.image);
if (wildcard) { if (wildcard) {
q = getWildcardQuery(field, term.image); q = getWildcardQuery(field, termImage);
} else if (prefix) { } else if (prefix) {
q = getPrefixQuery(field, term.image.substring q = getPrefixQuery(field,
(0, term.image.length()-1)); discardEscapeChar(term.image.substring
(0, term.image.length()-1)));
} else if (fuzzy) { } else if (fuzzy) {
q = getFuzzyQuery(field, term.image); q = getFuzzyQuery(field, termImage);
} else { } else {
q = getFieldQuery(field, analyzer, term.image); q = getFieldQuery(field, analyzer, termImage);
} }
} }
| ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> ) | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
@ -640,11 +621,16 @@ Query Term(String field) : {
<RANGEIN_END> ) <RANGEIN_END> )
[ <CARAT> boost=<NUMBER> ] [ <CARAT> boost=<NUMBER> ]
{ {
if (goop1.kind == RANGEIN_QUOTED) if (goop1.kind == RANGEIN_QUOTED) {
goop1.image = goop1.image.substring(1, goop1.image.length()-1); goop1.image = goop1.image.substring(1, goop1.image.length()-1);
if (goop2.kind == RANGEIN_QUOTED) } else {
goop1.image = discardEscapeChar(goop1.image);
}
if (goop2.kind == RANGEIN_QUOTED) {
goop2.image = goop2.image.substring(1, goop2.image.length()-1); goop2.image = goop2.image.substring(1, goop2.image.length()-1);
} else {
goop2.image = discardEscapeChar(goop2.image);
}
q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true); q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true);
} }
| ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> ) | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
@ -652,10 +638,16 @@ Query Term(String field) : {
<RANGEEX_END> ) <RANGEEX_END> )
[ <CARAT> boost=<NUMBER> ] [ <CARAT> boost=<NUMBER> ]
{ {
if (goop1.kind == RANGEEX_QUOTED) if (goop1.kind == RANGEEX_QUOTED) {
goop1.image = goop1.image.substring(1, goop1.image.length()-1); goop1.image = goop1.image.substring(1, goop1.image.length()-1);
if (goop2.kind == RANGEEX_QUOTED) } else {
goop1.image = discardEscapeChar(goop1.image);
}
if (goop2.kind == RANGEEX_QUOTED) {
goop2.image = goop2.image.substring(1, goop2.image.length()-1); goop2.image = goop2.image.substring(1, goop2.image.length()-1);
} else {
goop2.image = discardEscapeChar(goop2.image);
}
q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false); q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false);
} }
@ -681,9 +673,9 @@ Query Term(String field) : {
f = Float.valueOf(boost.image).floatValue(); f = Float.valueOf(boost.image).floatValue();
} }
catch (Exception ignored) { catch (Exception ignored) {
/* Should this be handled somehow? (defaults to "no boost", if /* Should this be handled somehow? (defaults to "no boost", if
* boost number is invalid) * boost number is invalid)
*/ */
} }
// avoid boosting null queries, such as those caused by stop words // avoid boosting null queries, such as those caused by stop words

View File

@ -1,57 +1,19 @@
package org.apache.lucene.queryParser; package org.apache.lucene.queryParser;
/* ==================================================================== /**
* The Apache Software License, Version 1.1 * Copyright 2002-2004 The Apache Software Foundation
* *
* Copyright (c) 2001, 2002, 2003 The Apache Software Foundation. All * Licensed under the Apache License, Version 2.0 (the "License");
* rights reserved. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* *
* Redistribution and use in source and binary forms, with or without * http://www.apache.org/licenses/LICENSE-2.0
* modification, are permitted provided that the following conditions
* are met:
* *
* 1. Redistributions of source code must retain the above copyright * Unless required by applicable law or agreed to in writing, software
* notice, this list of conditions and the following disclaimer. * distributed under the License is distributed on an "AS IS" BASIS,
* * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* 2. Redistributions in binary form must reproduce the above copyright * See the License for the specific language governing permissions and
* notice, this list of conditions and the following disclaimer in * limitations under the License.
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Lucene" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache Lucene", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/ */
import junit.framework.TestCase; import junit.framework.TestCase;
@ -355,7 +317,7 @@ public class TestQueryParser extends TestCase {
public void testEscaped() throws Exception { public void testEscaped() throws Exception {
Analyzer a = new WhitespaceAnalyzer(); Analyzer a = new WhitespaceAnalyzer();
assertQueryEquals("\\[brackets", a, "\\[brackets"); /* assertQueryEquals("\\[brackets", a, "\\[brackets");
assertQueryEquals("\\[brackets", null, "brackets"); assertQueryEquals("\\[brackets", null, "brackets");
assertQueryEquals("\\\\", a, "\\\\"); assertQueryEquals("\\\\", a, "\\\\");
assertQueryEquals("\\+blah", a, "\\+blah"); assertQueryEquals("\\+blah", a, "\\+blah");
@ -377,8 +339,36 @@ public class TestQueryParser extends TestCase {
assertQueryEquals("\\?blah", a, "\\?blah"); assertQueryEquals("\\?blah", a, "\\?blah");
assertQueryEquals("foo \\&& bar", a, "foo \\&& bar"); assertQueryEquals("foo \\&& bar", a, "foo \\&& bar");
assertQueryEquals("foo \\|| bar", a, "foo \\|| bar"); assertQueryEquals("foo \\|| bar", a, "foo \\|| bar");
assertQueryEquals("foo \\AND bar", a, "foo \\AND bar"); assertQueryEquals("foo \\AND bar", a, "foo \\AND bar"); */
assertQueryEquals("a\\-b:c",a,"a-b:c");
assertQueryEquals("a\\+b:c",a,"a+b:c");
assertQueryEquals("a\\:b:c",a,"a:b:c");
assertQueryEquals("a\\\\b:c",a,"a\\b:c");
assertQueryEquals("a:b\\-c",a,"a:b-c");
assertQueryEquals("a:b\\+c",a,"a:b+c");
assertQueryEquals("a:b\\:c",a,"a:b:c");
assertQueryEquals("a:b\\\\c",a,"a:b\\c");
assertQueryEquals("a:b\\-c*",a,"a:b-c*");
assertQueryEquals("a:b\\+c*",a,"a:b+c*");
assertQueryEquals("a:b\\:c*",a,"a:b:c*");
assertQueryEquals("a:b\\\\c*",a,"a:b\\c*");
assertQueryEquals("a:b\\-?c",a,"a:b-?c");
assertQueryEquals("a:b\\+?c",a,"a:b+?c");
assertQueryEquals("a:b\\:?c",a,"a:b:?c");
assertQueryEquals("a:b\\\\?c",a,"a:b\\?c");
assertQueryEquals("a:b\\-c~",a,"a:b-c~");
assertQueryEquals("a:b\\+c~",a,"a:b+c~");
assertQueryEquals("a:b\\:c~",a,"a:b:c~");
assertQueryEquals("a:b\\\\c~",a,"a:b\\c~");
assertQueryEquals("[ a\\- TO a\\+ ]", null, "[a- TO a+]");
assertQueryEquals("[ a\\: TO a\\~ ]", null, "[a: TO a~]");
assertQueryEquals("[ a\\\\ TO a\\* ]", null, "[a\\ TO a*]");
} }
public void testSimpleDAO() public void testSimpleDAO()