Support for new range query syntax. The delimiter is " TO ", but is optional

for backward compatibility with previous syntax. If the range arguments match the format supported by DateFormat.getDateInstance(DateFormat.SHORT), then they will be converted into the appropriate date strings a la DateField. Added Field.Keyword "constructor" for Date-valued arguments. Optimized DateField.timeToString function. PR: Obtained from: Submitted by: Brian Goetz Reviewed by: git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@149785 13f79535-47bb-0310-9956-ffa450edef68
2002-06-25 00:05:31 +00:00 · 2002-06-25 00:05:31 +00:00 · 8ab0396cfc
parent b1b3d33cf7
commit 8ab0396cfc
4 changed files with 102 additions and 45 deletions
--- a/src/java/org/apache/lucene/document/DateField.java
+++ b/src/java/org/apache/lucene/document/DateField.java
@ -105,8 +105,13 @@ public class DateField {
    if (s.length() > DATE_LEN)
      throw new RuntimeException("time too late");

-    while (s.length() < DATE_LEN)
-      s = "0" + s;				  // pad with leading zeros
+    // Pad with leading zeros
+    if (s.length() < DATE_LEN) {
+      StringBuffer sb = new StringBuffer(s);
+      while (sb.length() < DATE_LEN)
+        sb.insert(0, ' ');
+      s = sb.toString();
+    }

    return s;
  }
--- a/src/java/org/apache/lucene/document/Field.java
+++ b/src/java/org/apache/lucene/document/Field.java
@ -55,6 +55,7 @@ package org.apache.lucene.document;
 */

 import java.io.Reader;
+import java.util.Date;

 /**
  A field is a section of a Document.  Each field has two parts, a name and a
@ -91,6 +92,13 @@ public final class Field {
    return new Field(name, value, true, true, true);
  }

+  /** Constructs a Date-valued Field that is tokenized and indexed,
+    and is stored in the index, for return with hits.  Useful for short text
+    fields, like "title" or "subject". */
+  public static final Field Keyword(String name, Date value) {
+    return new Field(name, DateField.dateToString(value), true, true, true);
+  }
+
  /** Constructs a String-valued Field that is tokenized and indexed,
    but that is not stored in the index. */
  public static final Field UnStored(String name, String value) {
--- a/src/java/org/apache/lucene/queryParser/QueryParser.jj
+++ b/src/java/org/apache/lucene/queryParser/QueryParser.jj
@ -65,8 +65,11 @@ package org.apache.lucene.queryParser;

 import java.util.Vector;
 import java.io.*;
+import java.text.*;
+import java.util.*;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.analysis.*;
+import org.apache.lucene.document.*;
 import org.apache.lucene.search.*;

 /**
@ -218,35 +221,30 @@ public class QueryParser {

  private Query getRangeQuery(String field, 
                              Analyzer analyzer, 
-                              String queryText, 
+                              String part1, 
+                              String part2,
                              boolean inclusive) 
  {
-    // Use the analyzer to get all the tokens.  There should be 1 or 2.
-    TokenStream source = analyzer.tokenStream(field, 
-                                              new StringReader(queryText));
-    Term[] terms = new Term[2];
-    org.apache.lucene.analysis.Token t;
+    boolean isDate = false, isNumber = false;

-    for (int i = 0; i < 2; i++)
-    {
-      try 
-      {
-        t = source.next();
-      } 
-      catch (IOException e) 
-      {
-        t = null;
-      }
-      if (t != null)
-      {
-        String text = t.termText();
-        if (!text.equalsIgnoreCase("NULL"))
-        {
-          terms[i] = new Term(field, text);
-        }
-      }
+    try {
+      DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
+      df.setLenient(true);
+      Date d1 = df.parse(part1);
+      Date d2 = df.parse(part2);
+      part1 = DateField.dateToString(d1);
+      part2 = DateField.dateToString(d2);
+      isDate = true;
    }
-    return new RangeQuery(terms[0], terms[1], inclusive);
+    catch (Exception e) { }
+
+    if (!isDate) {
+      // @@@ Add number support
+    }
+
+    return new RangeQuery(new Term(field, part1), 
+                          new Term(field, part2), 
+                          inclusive);
  }

  public static void main(String[] args) throws Exception {
@ -282,7 +280,7 @@ PARSER_END(QueryParser)
 | <#_WHITESPACE: ( " " | "\t" ) >
 }

-<DEFAULT> SKIP : {
+<DEFAULT, RangeIn, RangeEx> SKIP : {
  <<_WHITESPACE>>
 }

@ -303,14 +301,28 @@ PARSER_END(QueryParser)
 | <PREFIXTERM:  <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
 | <WILDTERM:  <_TERM_START_CHAR> 
              (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
-| <RANGEIN:   "[" ( ~[ "]" ] )+ "]">
-| <RANGEEX:   "{" ( ~[ "}" ] )+ "}">
+| <RANGEIN_START: "[" > : RangeIn
+| <RANGEEX_START: "{" > : RangeEx
 }

 <Boost> TOKEN : {
 <NUMBER:    (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
 }

+<RangeIn> TOKEN : {
+<RANGEIN_TO: "TO">
+| <RANGEIN_END: "]"> : DEFAULT
+| <RANGEIN_QUOTED: "\"" (~["\""])+ "\"">
+| <RANGEIN_GOOP: (~[ " ", "]" ])+ >
+}
+
+<RangeEx> TOKEN : {
+<RANGEEX_TO: "TO">
+| <RANGEEX_END: "}"> : DEFAULT
+| <RANGEEX_QUOTED: "\"" (~["\""])+ "\"">
+| <RANGEEX_GOOP: (~[ " ", "}" ])+ >
+}
+
 // *   Query  ::= ( Clause )*
 // *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )

@ -387,7 +399,7 @@ Query Clause(String field) : {
    

 Query Term(String field) : { 
-  Token term, boost=null, slop=null;
+  Token term, boost=null, slop=null, goop1, goop2;
  boolean prefix = false;
  boolean wildcard = false;
  boolean fuzzy = false;
@ -415,12 +427,29 @@ Query Term(String field) : {
       else
         q = getFieldQuery(field, analyzer, term.image); 
     }
-     | ( term=<RANGEIN> { rangein=true; } | term=<RANGEEX> )
+     | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
+         [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> ) 
+         <RANGEIN_END> )
       [ <CARAT> boost=<NUMBER> ]
        {
-          q = getRangeQuery(field, analyzer, 
-                            term.image.substring(1, term.image.length()-1), 
-                            rangein);
+          if (goop1.kind == RANGEIN_QUOTED)
+            goop1.image = goop1.image.substring(1, goop1.image.length()-1);
+          if (goop2.kind == RANGEIN_QUOTED)
+            goop2.image = goop2.image.substring(1, goop2.image.length()-1);
+
+          q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true);
+        }
+     | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
+         [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> ) 
+         <RANGEEX_END> )
+       [ <CARAT> boost=<NUMBER> ]
+        {
+          if (goop1.kind == RANGEEX_QUOTED)
+            goop1.image = goop1.image.substring(1, goop1.image.length()-1);
+          if (goop2.kind == RANGEEX_QUOTED)
+            goop2.image = goop2.image.substring(1, goop2.image.length()-1);
+
+          q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false);
        }
     | term=<QUOTED> 
       [ slop=<SLOP> ]
--- a/src/test/org/apache/lucene/queryParser/TestQueryParser.java
+++ b/src/test/org/apache/lucene/queryParser/TestQueryParser.java
@ -55,11 +55,14 @@ package org.apache.lucene.queryParser;
 */

 import java.io.*;
+import java.text.*;
+import java.util.*;
 import junit.framework.*;

 import org.apache.lucene.*;
 import org.apache.lucene.queryParser.*;
 import org.apache.lucene.search.*;
+import org.apache.lucene.document.DateField;
 import org.apache.lucene.analysis.*;
 import org.apache.lucene.analysis.standard.*;
 import org.apache.lucene.analysis.Token;
@ -235,16 +238,28 @@ public class TestQueryParser extends TestCase {
  }

  public void testRange() throws Exception {
-    assertQueryEquals("[ a z]", null, "[a-z]");
-    assertTrue(getQuery("[ a z]", null) instanceof RangeQuery);
-    assertQueryEquals("[ a z ]", null, "[a-z]");
-    assertQueryEquals("{ a z}", null, "{a-z}");
-    assertQueryEquals("{ a z }", null, "{a-z}");
-    assertQueryEquals("{ a z }^2.0", null, "{a-z}^2.0");
-    assertQueryEquals("[ a z] OR bar", null, "[a-z] bar");
-    assertQueryEquals("[ a z] AND bar", null, "+[a-z] +bar");
-    assertQueryEquals("( bar blar { a z}) ", null, "bar blar {a-z}");
-    assertQueryEquals("gack ( bar blar { a z}) ", null, "gack (bar blar {a-z})");
+    assertQueryEquals("[ a TO z]", null, "[a-z]");
+    assertTrue(getQuery("[ a TO z]", null) instanceof RangeQuery);
+    assertQueryEquals("[ a TO z ]", null, "[a-z]");
+    assertQueryEquals("{ a TO z}", null, "{a-z}");
+    assertQueryEquals("{ a TO z }", null, "{a-z}");
+    assertQueryEquals("{ a TO z }^2.0", null, "{a-z}^2.0");
+    assertQueryEquals("[ a TO z] OR bar", null, "[a-z] bar");
+    assertQueryEquals("[ a TO z] AND bar", null, "+[a-z] +bar");
+    assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a-z}");
+    assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a-z})");
+  }
+
+  public String getDate(String s) throws Exception {
+    DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
+    return DateField.dateToString(df.parse(s));
+  }
+
+  public void testDateRange() throws Exception {
+    assertQueryEquals("[ 1/1/02 TO 1/4/02]", null, 
+                      "[" + getDate("1/1/02") + "-" + getDate("1/4/02") + "]");
+    assertQueryEquals("{  1/1/02    1/4/02   }", null, 
+                      "{" + getDate("1/1/02") + "-" + getDate("1/4/02") + "}");
  }

  public void testEscaped() throws Exception {