LUCENE-995: QP treats * as open end in range

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1026602 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2010-10-23 11:30:51 +00:00
parent 82126d5341
commit c74daf63d1
8 changed files with 95 additions and 55 deletions

View File

@ -275,6 +275,11 @@ New features
QueryParser subclasses that overrode getRangeQuery will need to be changed
to use the new getRangeQuery method. (Andrew Schurman, Mark Miller, yonik)
* LUCENE-995: The QueryParser now interprets * as an open end for range
queries. Literal asterisks may be represented by quoting or escaping
(i.e. \* or "*") Custom QueryParser sublcasses overriding getRangeQuery()
will be passed null for any open endpoint. (Adriano Crestani, yonik)
Optimizations
* LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.

View File

@ -272,49 +272,58 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive)
throws ParseException {
// get Analyzer from superclass and tokenize the terms
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(part1));
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
TokenStream source = null;
CharTermAttribute termAtt = null;
boolean multipleTokens = false;
// part1
try {
if (source.incrementToken()) {
part1 = termAtt.toString();
if (part1 != null) {
// part1
try {
source = getAnalyzer().tokenStream(field, new StringReader(part1));
termAtt = source.addAttribute(CharTermAttribute.class);
multipleTokens = false;
if (source.incrementToken()) {
part1 = termAtt.toString();
}
multipleTokens = source.incrementToken();
} catch (IOException e) {
// ignore
}
try {
source.close();
} catch (IOException e) {
// ignore
}
if (multipleTokens) {
throw new ParseException("Cannot build RangeQuery with analyzer " + getAnalyzer().getClass()
+ " - tokens were added to part1");
}
multipleTokens = source.incrementToken();
} catch (IOException e) {
// ignore
}
try {
source.close();
} catch (IOException e) {
// ignore
}
if (multipleTokens) {
throw new ParseException("Cannot build RangeQuery with analyzer " + getAnalyzer().getClass()
+ " - tokens were added to part1");
}
// part2
source = getAnalyzer().tokenStream(field, new StringReader(part2));
termAtt = source.addAttribute(CharTermAttribute.class);
if (part2 != null) {
// part2
source = getAnalyzer().tokenStream(field, new StringReader(part2));
termAtt = source.addAttribute(CharTermAttribute.class);
try {
if (source.incrementToken()) {
part2 = termAtt.toString();
try {
if (source.incrementToken()) {
part2 = termAtt.toString();
}
multipleTokens = source.incrementToken();
} catch (IOException e) {
// ignore
}
try {
source.close();
} catch (IOException e) {
// ignore
}
if (multipleTokens) {
throw new ParseException("Cannot build RangeQuery with analyzer " + getAnalyzer().getClass()
+ " - tokens were added to part2");
}
multipleTokens = source.incrementToken();
} catch (IOException e) {
// ignore
}
try {
source.close();
} catch (IOException e) {
// ignore
}
if (multipleTokens) {
throw new ParseException("Cannot build RangeQuery with analyzer " + getAnalyzer().getClass()
+ " - tokens were added to part2");
}
return super.getRangeQuery(field, part1, part2, startInclusive, endInclusive);
}

View File

@ -448,13 +448,19 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
jj_la1[17] = jj_gen;
;
}
boolean startOpen=false;
boolean endOpen=false;
if (goop1.kind == RANGE_QUOTED) {
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
} else if ("*".equals(goop1.image)) {
startOpen=true;
}
if (goop2.kind == RANGE_QUOTED) {
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
} else if ("*".equals(goop2.image)) {
endOpen=true;
}
q = getRangeQuery(field, discardEscapeChar(goop1.image), discardEscapeChar(goop2.image), startInc, endInc);
q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
break;
case QUOTED:
term = jj_consume_token(QUOTED);

View File

@ -328,13 +328,19 @@ Query Term(String field) : {
( <RANGEIN_END> {endInc=true;} | <RANGEEX_END>))
[ <CARAT> boost=<NUMBER> ]
{
boolean startOpen=false;
boolean endOpen=false;
if (goop1.kind == RANGE_QUOTED) {
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
} else if ("*".equals(goop1.image)) {
startOpen=true;
}
if (goop2.kind == RANGE_QUOTED) {
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
} else if ("*".equals(goop2.image)) {
endOpen=true;
}
q = getRangeQuery(field, discardEscapeChar(goop1.image), discardEscapeChar(goop2.image), startInc, endInc);
q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
}
| term=<QUOTED>
[ fuzzySlop=<FUZZY_SLOP> ]

View File

@ -702,7 +702,8 @@ public abstract class QueryParserBase {
/**
* @exception org.apache.lucene.queryParser.ParseException throw in overridden method to disallow
*
* @exception org.apache.lucene.queryParser.ParseException
*/
protected Query getRangeQuery(String field,
String part1,
@ -711,13 +712,28 @@ public abstract class QueryParserBase {
boolean endInclusive) throws ParseException
{
if (lowercaseExpandedTerms) {
part1 = part1.toLowerCase();
part2 = part2.toLowerCase();
part1 = part1==null ? null : part1.toLowerCase();
part2 = part2==null ? null : part2.toLowerCase();
}
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale);
df.setLenient(true);
DateTools.Resolution resolution = getDateResolution(field);
try {
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale);
df.setLenient(true);
Date d1 = df.parse(part1);
if (resolution == null) {
// no default or field specific date resolution has been set,
// use deprecated DateField to maintain compatibility with
// pre-1.9 Lucene versions.
part1 = DateField.dateToString(d1);
} else {
part1 = DateTools.dateToString(d1, resolution);
}
} catch (Exception e) { }
try {
Date d2 = df.parse(part2);
if (endInclusive) {
// The user can only specify the date, not the time, so make sure
@ -731,19 +747,15 @@ public abstract class QueryParserBase {
cal.set(Calendar.MILLISECOND, 999);
d2 = cal.getTime();
}
DateTools.Resolution resolution = getDateResolution(field);
if (resolution == null) {
// no default or field specific date resolution has been set,
// use deprecated DateField to maintain compatibility with
// pre-1.9 Lucene versions.
part1 = DateField.dateToString(d1);
part2 = DateField.dateToString(d2);
} else {
part1 = DateTools.dateToString(d1, resolution);
part2 = DateTools.dateToString(d2, resolution);
}
}
catch (Exception e) { }
} catch (Exception e) { }
return newRangeQuery(field, part1, part2, startInclusive, endInclusive);
}

View File

@ -160,9 +160,9 @@ public class TermRangeQuery extends MultiTermQuery {
buffer.append(":");
}
buffer.append(includeLower ? '[' : '{');
buffer.append(lowerTerm != null ? lowerTerm : "*");
buffer.append(lowerTerm != null ? ("*".equals(lowerTerm) ? "\\*" : lowerTerm) : "*");
buffer.append(" TO ");
buffer.append(upperTerm != null ? upperTerm : "*");
buffer.append(upperTerm != null ? ("*".equals(upperTerm) ? "\\*" : upperTerm) : "*");
buffer.append(includeUpper ? ']' : '}');
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();

View File

@ -558,6 +558,11 @@ public class TestQueryParser extends LuceneTestCase {
assertQueryEquals("[ a TO z] AND bar", null, "+[a TO z] +bar");
assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}");
assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})");
assertQueryEquals("[* TO Z]",null,"[* TO z]");
assertQueryEquals("[A TO *]",null,"[a TO *]");
assertQueryEquals("[* TO *]",null,"[* TO *]");
assertQueryEquals("[\\* TO \"*\"]",null,"[\\* TO \\*]");
}
public void testFarsiRangeCollating() throws Exception {

View File

@ -166,10 +166,7 @@ public class SolrQueryParser extends QueryParser {
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException {
checkNullField(field);
SchemaField sf = schema.getField(field);
return sf.getType().getRangeQuery(parser, sf,
"*".equals(part1) ? null : part1,
"*".equals(part2) ? null : part2,
startInclusive, endInclusive);
return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive);
}
@Override