mirror of https://github.com/apache/lucene.git
LUCENE-995: QP treats * as open end in range
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1026602 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
82126d5341
commit
c74daf63d1
|
@ -275,6 +275,11 @@ New features
|
|||
QueryParser subclasses that overrode getRangeQuery will need to be changed
|
||||
to use the new getRangeQuery method. (Andrew Schurman, Mark Miller, yonik)
|
||||
|
||||
* LUCENE-995: The QueryParser now interprets * as an open end for range
|
||||
queries. Literal asterisks may be represented by quoting or escaping
|
||||
(i.e. \* or "*") Custom QueryParser sublcasses overriding getRangeQuery()
|
||||
will be passed null for any open endpoint. (Adriano Crestani, yonik)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.
|
||||
|
|
|
@ -272,49 +272,58 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
|||
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive)
|
||||
throws ParseException {
|
||||
// get Analyzer from superclass and tokenize the terms
|
||||
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(part1));
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
TokenStream source = null;
|
||||
CharTermAttribute termAtt = null;
|
||||
boolean multipleTokens = false;
|
||||
|
||||
// part1
|
||||
try {
|
||||
if (source.incrementToken()) {
|
||||
part1 = termAtt.toString();
|
||||
if (part1 != null) {
|
||||
// part1
|
||||
try {
|
||||
source = getAnalyzer().tokenStream(field, new StringReader(part1));
|
||||
termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
multipleTokens = false;
|
||||
|
||||
|
||||
if (source.incrementToken()) {
|
||||
part1 = termAtt.toString();
|
||||
}
|
||||
multipleTokens = source.incrementToken();
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
}
|
||||
try {
|
||||
source.close();
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
}
|
||||
if (multipleTokens) {
|
||||
throw new ParseException("Cannot build RangeQuery with analyzer " + getAnalyzer().getClass()
|
||||
+ " - tokens were added to part1");
|
||||
}
|
||||
multipleTokens = source.incrementToken();
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
}
|
||||
try {
|
||||
source.close();
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
}
|
||||
if (multipleTokens) {
|
||||
throw new ParseException("Cannot build RangeQuery with analyzer " + getAnalyzer().getClass()
|
||||
+ " - tokens were added to part1");
|
||||
}
|
||||
|
||||
// part2
|
||||
source = getAnalyzer().tokenStream(field, new StringReader(part2));
|
||||
termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
if (part2 != null) {
|
||||
// part2
|
||||
source = getAnalyzer().tokenStream(field, new StringReader(part2));
|
||||
termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
|
||||
try {
|
||||
if (source.incrementToken()) {
|
||||
part2 = termAtt.toString();
|
||||
try {
|
||||
if (source.incrementToken()) {
|
||||
part2 = termAtt.toString();
|
||||
}
|
||||
multipleTokens = source.incrementToken();
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
}
|
||||
try {
|
||||
source.close();
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
}
|
||||
if (multipleTokens) {
|
||||
throw new ParseException("Cannot build RangeQuery with analyzer " + getAnalyzer().getClass()
|
||||
+ " - tokens were added to part2");
|
||||
}
|
||||
multipleTokens = source.incrementToken();
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
}
|
||||
try {
|
||||
source.close();
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
}
|
||||
if (multipleTokens) {
|
||||
throw new ParseException("Cannot build RangeQuery with analyzer " + getAnalyzer().getClass()
|
||||
+ " - tokens were added to part2");
|
||||
}
|
||||
return super.getRangeQuery(field, part1, part2, startInclusive, endInclusive);
|
||||
}
|
||||
|
|
|
@ -448,13 +448,19 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
jj_la1[17] = jj_gen;
|
||||
;
|
||||
}
|
||||
boolean startOpen=false;
|
||||
boolean endOpen=false;
|
||||
if (goop1.kind == RANGE_QUOTED) {
|
||||
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
|
||||
} else if ("*".equals(goop1.image)) {
|
||||
startOpen=true;
|
||||
}
|
||||
if (goop2.kind == RANGE_QUOTED) {
|
||||
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
|
||||
} else if ("*".equals(goop2.image)) {
|
||||
endOpen=true;
|
||||
}
|
||||
q = getRangeQuery(field, discardEscapeChar(goop1.image), discardEscapeChar(goop2.image), startInc, endInc);
|
||||
q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
|
||||
break;
|
||||
case QUOTED:
|
||||
term = jj_consume_token(QUOTED);
|
||||
|
|
|
@ -328,13 +328,19 @@ Query Term(String field) : {
|
|||
( <RANGEIN_END> {endInc=true;} | <RANGEEX_END>))
|
||||
[ <CARAT> boost=<NUMBER> ]
|
||||
{
|
||||
boolean startOpen=false;
|
||||
boolean endOpen=false;
|
||||
if (goop1.kind == RANGE_QUOTED) {
|
||||
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
|
||||
} else if ("*".equals(goop1.image)) {
|
||||
startOpen=true;
|
||||
}
|
||||
if (goop2.kind == RANGE_QUOTED) {
|
||||
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
|
||||
} else if ("*".equals(goop2.image)) {
|
||||
endOpen=true;
|
||||
}
|
||||
q = getRangeQuery(field, discardEscapeChar(goop1.image), discardEscapeChar(goop2.image), startInc, endInc);
|
||||
q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
|
||||
}
|
||||
| term=<QUOTED>
|
||||
[ fuzzySlop=<FUZZY_SLOP> ]
|
||||
|
|
|
@ -702,7 +702,8 @@ public abstract class QueryParserBase {
|
|||
|
||||
|
||||
/**
|
||||
* @exception org.apache.lucene.queryParser.ParseException throw in overridden method to disallow
|
||||
*
|
||||
* @exception org.apache.lucene.queryParser.ParseException
|
||||
*/
|
||||
protected Query getRangeQuery(String field,
|
||||
String part1,
|
||||
|
@ -711,13 +712,28 @@ public abstract class QueryParserBase {
|
|||
boolean endInclusive) throws ParseException
|
||||
{
|
||||
if (lowercaseExpandedTerms) {
|
||||
part1 = part1.toLowerCase();
|
||||
part2 = part2.toLowerCase();
|
||||
part1 = part1==null ? null : part1.toLowerCase();
|
||||
part2 = part2==null ? null : part2.toLowerCase();
|
||||
}
|
||||
|
||||
|
||||
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale);
|
||||
df.setLenient(true);
|
||||
DateTools.Resolution resolution = getDateResolution(field);
|
||||
|
||||
try {
|
||||
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale);
|
||||
df.setLenient(true);
|
||||
Date d1 = df.parse(part1);
|
||||
if (resolution == null) {
|
||||
// no default or field specific date resolution has been set,
|
||||
// use deprecated DateField to maintain compatibility with
|
||||
// pre-1.9 Lucene versions.
|
||||
part1 = DateField.dateToString(d1);
|
||||
} else {
|
||||
part1 = DateTools.dateToString(d1, resolution);
|
||||
}
|
||||
} catch (Exception e) { }
|
||||
|
||||
try {
|
||||
Date d2 = df.parse(part2);
|
||||
if (endInclusive) {
|
||||
// The user can only specify the date, not the time, so make sure
|
||||
|
@ -731,19 +747,15 @@ public abstract class QueryParserBase {
|
|||
cal.set(Calendar.MILLISECOND, 999);
|
||||
d2 = cal.getTime();
|
||||
}
|
||||
DateTools.Resolution resolution = getDateResolution(field);
|
||||
if (resolution == null) {
|
||||
// no default or field specific date resolution has been set,
|
||||
// use deprecated DateField to maintain compatibility with
|
||||
// pre-1.9 Lucene versions.
|
||||
part1 = DateField.dateToString(d1);
|
||||
part2 = DateField.dateToString(d2);
|
||||
} else {
|
||||
part1 = DateTools.dateToString(d1, resolution);
|
||||
part2 = DateTools.dateToString(d2, resolution);
|
||||
}
|
||||
}
|
||||
catch (Exception e) { }
|
||||
} catch (Exception e) { }
|
||||
|
||||
return newRangeQuery(field, part1, part2, startInclusive, endInclusive);
|
||||
}
|
||||
|
|
|
@ -160,9 +160,9 @@ public class TermRangeQuery extends MultiTermQuery {
|
|||
buffer.append(":");
|
||||
}
|
||||
buffer.append(includeLower ? '[' : '{');
|
||||
buffer.append(lowerTerm != null ? lowerTerm : "*");
|
||||
buffer.append(lowerTerm != null ? ("*".equals(lowerTerm) ? "\\*" : lowerTerm) : "*");
|
||||
buffer.append(" TO ");
|
||||
buffer.append(upperTerm != null ? upperTerm : "*");
|
||||
buffer.append(upperTerm != null ? ("*".equals(upperTerm) ? "\\*" : upperTerm) : "*");
|
||||
buffer.append(includeUpper ? ']' : '}');
|
||||
buffer.append(ToStringUtils.boost(getBoost()));
|
||||
return buffer.toString();
|
||||
|
|
|
@ -558,6 +558,11 @@ public class TestQueryParser extends LuceneTestCase {
|
|||
assertQueryEquals("[ a TO z] AND bar", null, "+[a TO z] +bar");
|
||||
assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}");
|
||||
assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})");
|
||||
|
||||
assertQueryEquals("[* TO Z]",null,"[* TO z]");
|
||||
assertQueryEquals("[A TO *]",null,"[a TO *]");
|
||||
assertQueryEquals("[* TO *]",null,"[* TO *]");
|
||||
assertQueryEquals("[\\* TO \"*\"]",null,"[\\* TO \\*]");
|
||||
}
|
||||
|
||||
public void testFarsiRangeCollating() throws Exception {
|
||||
|
|
|
@ -166,10 +166,7 @@ public class SolrQueryParser extends QueryParser {
|
|||
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException {
|
||||
checkNullField(field);
|
||||
SchemaField sf = schema.getField(field);
|
||||
return sf.getType().getRangeQuery(parser, sf,
|
||||
"*".equals(part1) ? null : part1,
|
||||
"*".equals(part2) ? null : part2,
|
||||
startInclusive, endInclusive);
|
||||
return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue