Support for new range query syntax. The delimiter is " TO ", but is optional

for backward compatibility with previous syntax.  If the range arguments
match the format supported by DateFormat.getDateInstance(DateFormat.SHORT),
then they will be converted into the appropriate date strings a la DateField.

Added Field.Keyword "constructor" for Date-valued arguments.

Optimized DateField.timeToString function.

PR:
Obtained from:
Submitted by:	Brian Goetz
Reviewed by:


git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@149785 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Brian Goetz 2002-06-25 00:05:31 +00:00
parent b1b3d33cf7
commit 8ab0396cfc
4 changed files with 102 additions and 45 deletions

View File

@ -105,8 +105,13 @@ public class DateField {
if (s.length() > DATE_LEN)
throw new RuntimeException("time too late");
while (s.length() < DATE_LEN)
s = "0" + s; // pad with leading zeros
// Pad with leading zeros
if (s.length() < DATE_LEN) {
StringBuffer sb = new StringBuffer(s);
while (sb.length() < DATE_LEN)
sb.insert(0, ' ');
s = sb.toString();
}
return s;
}

View File

@ -55,6 +55,7 @@ package org.apache.lucene.document;
*/
import java.io.Reader;
import java.util.Date;
/**
A field is a section of a Document. Each field has two parts, a name and a
@ -91,6 +92,13 @@ public final class Field {
return new Field(name, value, true, true, true);
}
/** Constructs a Date-valued Field that is tokenized and indexed,
and is stored in the index, for return with hits. Useful for short text
fields, like "title" or "subject". */
public static final Field Keyword(String name, Date value) {
return new Field(name, DateField.dateToString(value), true, true, true);
}
/** Constructs a String-valued Field that is tokenized and indexed,
but that is not stored in the index. */
public static final Field UnStored(String name, String value) {

View File

@ -65,8 +65,11 @@ package org.apache.lucene.queryParser;
import java.util.Vector;
import java.io.*;
import java.text.*;
import java.util.*;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.*;
import org.apache.lucene.document.*;
import org.apache.lucene.search.*;
/**
@ -218,35 +221,30 @@ public class QueryParser {
private Query getRangeQuery(String field,
Analyzer analyzer,
String queryText,
String part1,
String part2,
boolean inclusive)
{
// Use the analyzer to get all the tokens. There should be 1 or 2.
TokenStream source = analyzer.tokenStream(field,
new StringReader(queryText));
Term[] terms = new Term[2];
org.apache.lucene.analysis.Token t;
boolean isDate = false, isNumber = false;
for (int i = 0; i < 2; i++)
{
try
{
t = source.next();
}
catch (IOException e)
{
t = null;
}
if (t != null)
{
String text = t.termText();
if (!text.equalsIgnoreCase("NULL"))
{
terms[i] = new Term(field, text);
}
}
try {
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
df.setLenient(true);
Date d1 = df.parse(part1);
Date d2 = df.parse(part2);
part1 = DateField.dateToString(d1);
part2 = DateField.dateToString(d2);
isDate = true;
}
return new RangeQuery(terms[0], terms[1], inclusive);
catch (Exception e) { }
if (!isDate) {
// @@@ Add number support
}
return new RangeQuery(new Term(field, part1),
new Term(field, part2),
inclusive);
}
public static void main(String[] args) throws Exception {
@ -282,7 +280,7 @@ PARSER_END(QueryParser)
| <#_WHITESPACE: ( " " | "\t" ) >
}
<DEFAULT> SKIP : {
<DEFAULT, RangeIn, RangeEx> SKIP : {
<<_WHITESPACE>>
}
@ -303,14 +301,28 @@ PARSER_END(QueryParser)
| <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
| <WILDTERM: <_TERM_START_CHAR>
(<_TERM_CHAR> | ( [ "*", "?" ] ))* >
| <RANGEIN: "[" ( ~[ "]" ] )+ "]">
| <RANGEEX: "{" ( ~[ "}" ] )+ "}">
| <RANGEIN_START: "[" > : RangeIn
| <RANGEEX_START: "{" > : RangeEx
}
<Boost> TOKEN : {
<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
}
<RangeIn> TOKEN : {
<RANGEIN_TO: "TO">
| <RANGEIN_END: "]"> : DEFAULT
| <RANGEIN_QUOTED: "\"" (~["\""])+ "\"">
| <RANGEIN_GOOP: (~[ " ", "]" ])+ >
}
<RangeEx> TOKEN : {
<RANGEEX_TO: "TO">
| <RANGEEX_END: "}"> : DEFAULT
| <RANGEEX_QUOTED: "\"" (~["\""])+ "\"">
| <RANGEEX_GOOP: (~[ " ", "}" ])+ >
}
// * Query ::= ( Clause )*
// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
@ -387,7 +399,7 @@ Query Clause(String field) : {
Query Term(String field) : {
Token term, boost=null, slop=null;
Token term, boost=null, slop=null, goop1, goop2;
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
@ -415,12 +427,29 @@ Query Term(String field) : {
else
q = getFieldQuery(field, analyzer, term.image);
}
| ( term=<RANGEIN> { rangein=true; } | term=<RANGEEX> )
| ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
[ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )
<RANGEIN_END> )
[ <CARAT> boost=<NUMBER> ]
{
q = getRangeQuery(field, analyzer,
term.image.substring(1, term.image.length()-1),
rangein);
if (goop1.kind == RANGEIN_QUOTED)
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
if (goop2.kind == RANGEIN_QUOTED)
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true);
}
| ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
[ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )
<RANGEEX_END> )
[ <CARAT> boost=<NUMBER> ]
{
if (goop1.kind == RANGEEX_QUOTED)
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
if (goop2.kind == RANGEEX_QUOTED)
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false);
}
| term=<QUOTED>
[ slop=<SLOP> ]

View File

@ -55,11 +55,14 @@ package org.apache.lucene.queryParser;
*/
import java.io.*;
import java.text.*;
import java.util.*;
import junit.framework.*;
import org.apache.lucene.*;
import org.apache.lucene.queryParser.*;
import org.apache.lucene.search.*;
import org.apache.lucene.document.DateField;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.standard.*;
import org.apache.lucene.analysis.Token;
@ -235,16 +238,28 @@ public class TestQueryParser extends TestCase {
}
public void testRange() throws Exception {
assertQueryEquals("[ a z]", null, "[a-z]");
assertTrue(getQuery("[ a z]", null) instanceof RangeQuery);
assertQueryEquals("[ a z ]", null, "[a-z]");
assertQueryEquals("{ a z}", null, "{a-z}");
assertQueryEquals("{ a z }", null, "{a-z}");
assertQueryEquals("{ a z }^2.0", null, "{a-z}^2.0");
assertQueryEquals("[ a z] OR bar", null, "[a-z] bar");
assertQueryEquals("[ a z] AND bar", null, "+[a-z] +bar");
assertQueryEquals("( bar blar { a z}) ", null, "bar blar {a-z}");
assertQueryEquals("gack ( bar blar { a z}) ", null, "gack (bar blar {a-z})");
assertQueryEquals("[ a TO z]", null, "[a-z]");
assertTrue(getQuery("[ a TO z]", null) instanceof RangeQuery);
assertQueryEquals("[ a TO z ]", null, "[a-z]");
assertQueryEquals("{ a TO z}", null, "{a-z}");
assertQueryEquals("{ a TO z }", null, "{a-z}");
assertQueryEquals("{ a TO z }^2.0", null, "{a-z}^2.0");
assertQueryEquals("[ a TO z] OR bar", null, "[a-z] bar");
assertQueryEquals("[ a TO z] AND bar", null, "+[a-z] +bar");
assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a-z}");
assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a-z})");
}
public String getDate(String s) throws Exception {
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
return DateField.dateToString(df.parse(s));
}
public void testDateRange() throws Exception {
assertQueryEquals("[ 1/1/02 TO 1/4/02]", null,
"[" + getDate("1/1/02") + "-" + getDate("1/4/02") + "]");
assertQueryEquals("{ 1/1/02 1/4/02 }", null,
"{" + getDate("1/1/02") + "-" + getDate("1/4/02") + "}");
}
public void testEscaped() throws Exception {