diff --git a/CHANGES.txt b/CHANGES.txt index 77f7af671fc..4b4dc5ffd2f 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -113,6 +113,10 @@ New Features 20. SOLR-102: Added RegexFragmenter, which splits text for highlighting based on a given pattern. (klaas) +21. SOLR-258: Date Faceting added to SimpleFacets. Facet counts + computed for ranges of size facet.date.gap (a DateMath expression) + between facet.date.start and facet.date.end. (hossman) + Changes in runtime behavior Optimizations diff --git a/src/java/org/apache/solr/common/params/SolrParams.java b/src/java/org/apache/solr/common/params/SolrParams.java index d997afc362a..8c101b19725 100644 --- a/src/java/org/apache/solr/common/params/SolrParams.java +++ b/src/java/org/apache/solr/common/params/SolrParams.java @@ -121,8 +121,74 @@ public abstract class SolrParams { * only use the filterCache for terms with a df >= to this parameter. */ public static final String FACET_ENUM_CACHE_MINDF = "facet.enum.cache.minDf"; + /** + * Any field whose terms the user wants to enumerate over for + * Facet Contraint Counts (multi-value) + */ + public static final String FACET_DATE = "facet.date"; + /** + * Date string indicating the starting point for a date facet range. + * Can be overriden on a per field basis. + */ + public static final String FACET_DATE_START = "facet.date.start"; + /** + * Date string indicating the endinging point for a date facet range. + * Can be overriden on a per field basis. + */ + public static final String FACET_DATE_END = "facet.date.end"; + /** + * Date Math string indicating the interval of sub-ranges for a date + * facet range. + * Can be overriden on a per field basis. + */ + public static final String FACET_DATE_GAP = "facet.date.gap"; + /** + * Boolean indicating how counts should be computed if the range + * between 'start' and 'end' is not evenly divisible by 'gap'. If + * this value is true, then all counts of ranges involving the 'end' + * point will use the exact endpoint specified -- this includes the + * 'between' and 'after' counts as well as the last range computed + * using the 'gap'. If the value is false, then 'gap' is used to + * compute the effective endpoint closest to the 'end' param which + * results in the range between 'start' and 'end' being evenly + * divisible by 'gap'. + * The default is false. + * Can be overriden on a per field basis. + */ + public static final String FACET_DATE_HARD_END = "facet.date.hardend"; + /** + * String indicating what "other" ranges should be computed for a + * date facet range (multi-value). + * Can be overriden on a per field basis. + * @see FacetDateOther + */ + public static final String FACET_DATE_OTHER = "facet.date.other"; - + /** + * An enumeration of the legal values for FACET_DATE_OTHER... + * + * @see #FACET_DATE_OTHER + */ + public enum FacetDateOther { + BEFORE, AFTER, BETWEEN, ALL, NONE; + public String toString() { return super.toString().toLowerCase(); } + public static FacetDateOther get(String label) { + try { + return valueOf(label.toUpperCase()); + } catch (IllegalArgumentException e) { + throw new SolrException + (SolrException.ErrorCode.BAD_REQUEST, + label+" is not a valid type of 'other' date facet information",e); + } + } + } + /** If the content stream should come from a URL (using URLConnection) */ public static final String STREAM_URL = "stream.url"; diff --git a/src/java/org/apache/solr/request/SimpleFacets.java b/src/java/org/apache/solr/request/SimpleFacets.java index c1b18386717..767e9dbf584 100644 --- a/src/java/org/apache/solr/request/SimpleFacets.java +++ b/src/java/org/apache/solr/request/SimpleFacets.java @@ -25,6 +25,7 @@ import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.search.*; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.params.SolrParams.FacetDateOther; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.core.SolrCore; @@ -33,12 +34,18 @@ import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.BoolField; +import org.apache.solr.schema.DateField; import org.apache.solr.search.*; import org.apache.solr.util.BoundedTreeSet; +import org.apache.solr.util.DateMathParser; import java.io.IOException; import java.util.Arrays; import java.util.Comparator; +import java.util.Date; +import java.util.Locale; +import java.util.Set; +import java.util.EnumSet; /** * A class that generates simple Facet information for a request. @@ -70,6 +77,7 @@ public class SimpleFacets { * * @see #getFacetQueryCounts * @see #getFacetFieldCounts + * @see #getFacetDateCounts * @see SolrParams#FACET * @return a NamedList of Facet Count info or null */ @@ -83,9 +91,9 @@ public class SimpleFacets { try { res.add("facet_queries", getFacetQueryCounts()); - res.add("facet_fields", getFacetFieldCounts()); - + res.add("facet_dates", getFacetDateCounts()); + } catch (Exception e) { SolrException.logOnce(SolrCore.log, "Exception during facet counts", e); res.add("exception", SolrException.toStr(e)); @@ -402,6 +410,148 @@ public class SimpleFacets { return res; } + /** + * Returns a list of value constraints and the associated facet counts + * for each facet date field, range, and interval specified in the + * SolrParams + * + * @see SolrParams#FACET_DATE + */ + public NamedList getFacetDateCounts() + throws IOException { + + final SolrParams required = new RequiredSolrParams(params); + final NamedList resOuter = new SimpleOrderedMap(); + final String[] fields = params.getParams(SolrParams.FACET_DATE); + final Date NOW = new Date(); + + if (null == fields || 0 == fields.length) return resOuter; + + final IndexSchema schema = searcher.getSchema(); + for (String f : fields) { + final NamedList resInner = new SimpleOrderedMap(); + resOuter.add(f, resInner); + final FieldType trash = schema.getFieldType(f); + if (! (trash instanceof DateField)) { + throw new SolrException + (SolrException.ErrorCode.BAD_REQUEST, + "Can not date facet on a field which is not a DateField: " + f); + } + final DateField ft = (DateField) trash; + final String startS + = required.getFieldParam(f,SolrParams.FACET_DATE_START); + final Date start; + try { + start = ft.parseMath(NOW, startS); + } catch (SolrException e) { + throw new SolrException + (SolrException.ErrorCode.BAD_REQUEST, + "date facet 'start' is not a valid Date string: " + startS, e); + } + final String endS + = required.getFieldParam(f,SolrParams.FACET_DATE_END); + Date end; // not final, hardend may change this + try { + end = ft.parseMath(NOW, endS); + } catch (SolrException e) { + throw new SolrException + (SolrException.ErrorCode.BAD_REQUEST, + "date facet 'end' is not a valid Date string: " + endS, e); + } + + if (end.before(start)) { + throw new SolrException + (SolrException.ErrorCode.BAD_REQUEST, + "date facet 'end' comes before 'start': "+endS+" < "+startS); + } + + final String gap = required.getFieldParam(f,SolrParams.FACET_DATE_GAP); + final DateMathParser dmp = new DateMathParser(ft.UTC, Locale.US); + dmp.setNow(NOW); + + try { + + Date low = start; + while (low.before(end)) { + dmp.setNow(low); + final String lowI = ft.toInternal(low); + final String label = ft.indexedToReadable(lowI); + Date high = dmp.parseMath(gap); + if (end.before(high)) { + if (params.getFieldBool(f,SolrParams.FACET_DATE_HARD_END,false)) { + high = end; + } else { + end = high; + } + } + if (high.before(low)) { + throw new SolrException + (SolrException.ErrorCode.BAD_REQUEST, + "date facet infinite loop (is gap negative?)"); + } + final String highI = ft.toInternal(high); + resInner.add(label, rangeCount(f,lowI,highI,true,true)); + low = high; + } + } catch (java.text.ParseException e) { + throw new SolrException + (SolrException.ErrorCode.BAD_REQUEST, + "date facet 'gap' is not a valid Date Math string: " + gap, e); + } + + // explicitly return the gap and end so all the counts are meaningful + resInner.add("gap", gap); + resInner.add("end", end); + + final String[] othersP = + params.getFieldParams(f,SolrParams.FACET_DATE_OTHER); + if (null != othersP && 0 < othersP.length ) { + Set others = EnumSet.noneOf(FacetDateOther.class); + + for (final String o : othersP) { + others.add(FacetDateOther.get(o)); + } + + // no matter what other values are listed, we don't do + // anything if "none" is specified. + if (! others.contains(FacetDateOther.NONE) ) { + final String startI = ft.toInternal(start); + final String endI = ft.toInternal(end); + + boolean all = others.contains(FacetDateOther.ALL); + + if (all || others.contains(FacetDateOther.BEFORE)) { + resInner.add(FacetDateOther.BEFORE.toString(), + rangeCount(f,null,startI,false,false)); + } + if (all || others.contains(FacetDateOther.AFTER)) { + resInner.add(FacetDateOther.AFTER.toString(), + rangeCount(f,endI,null,false,false)); + } + if (all || others.contains(FacetDateOther.BETWEEN)) { + resInner.add(FacetDateOther.BETWEEN.toString(), + rangeCount(f,startI,endI,true,true)); + } + } + } + } + + return resOuter; + } + + /** + * Macro for getting the numDocs of a ConstantScoreRangeQuery over docs + * @see docs + * @see SolrIndexSearcher#numDocs + * @see ConstantScoreRangeQuery + */ + protected int rangeCount(String field, String low, String high, + boolean iLow, boolean iHigh) throws IOException { + return searcher.numDocs(new ConstantScoreRangeQuery(field,low,high, + iHigh,iLow), + docs); + } + /** * A simple key=>val pair whose natural order is such that * higher vals come before lower vals. diff --git a/src/java/org/apache/solr/schema/DateField.java b/src/java/org/apache/solr/schema/DateField.java index c8ded715dfc..91d38e7d235 100644 --- a/src/java/org/apache/solr/schema/DateField.java +++ b/src/java/org/apache/solr/schema/DateField.java @@ -70,9 +70,10 @@ import java.text.ParseException; * *

* This FieldType also supports incoming "Date Math" strings for computing - * values by adding/rounding internals of time relative "NOW", - * ie: "NOW+1YEAR", "NOW/DAY", etc.. -- see {@link DateMathParser} - * for more examples. + * values by adding/rounding internals of time relative either an explicit + * datetime (in theformat specified above) or the literal string "NOW", + * ie: "NOW+1YEAR", "NOW/DAY", 1995-12-31T23:59:59.999Z+5MINUTES, etc... + * -- see {@link DateMathParser} for more examples. *

* * @version $Id$ @@ -91,20 +92,61 @@ public class DateField extends FieldType { protected void init(IndexSchema schema, Map args) { } + protected static String NOW = "NOW"; + protected static char Z = 'Z'; + public String toInternal(String val) { - int len=val.length(); - if (val.charAt(len-1)=='Z') { + final int len=val.length(); + if (val.charAt(len-1) == Z) { + // check common case first, simple datetime + // NOTE: not parsed to ensure correctness return val.substring(0,len-1); - } else if (val.startsWith("NOW")) { - /* :TODO: let Locale/TimeZone come from init args for rounding only */ - DateMathParser p = new DateMathParser(UTC, Locale.US); - try { - return toInternal(p.parseMath(val.substring(3))); - } catch (ParseException e) { - throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Invalid Date Math String:'" +val+'\'',e); + } + return toInternal(parseMath(null, val)); + } + + /** + * Parses a String which may be a date (in the standard format) + * followed by an optional math expression. + * @param now an optional fixed date to use as "NOW" in the DateMathParser + * @param val the string to parse + */ + public Date parseMath(Date now, String val) { + String math = null; + /* :TODO: let Locale/TimeZone come from init args for rounding only */ + final DateMathParser p = new DateMathParser(UTC, Locale.US); + + if (null != now) p.setNow(now); + + if (val.startsWith(NOW)) { + math = val.substring(NOW.length()); + } else { + final int zz = val.indexOf(Z); + if (0 < zz) { + math = val.substring(zz+1); + try { + p.setNow(toObject(val.substring(0,zz))); + } catch (ParseException e) { + throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, + "Invalid Date in Date Math String:'" + +val+'\'',e); + } + } else { + throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, + "Invalid Date String:'" +val+'\''); } } - throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Invalid Date String:'" +val+'\''); + + if (null == math || math.equals("")) { + return p.getNow(); + } + + try { + return p.parseMath(math); + } catch (ParseException e) { + throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, + "Invalid Date Math String:'" +val+'\'',e); + } } public String toInternal(Date val) { @@ -112,12 +154,15 @@ public class DateField extends FieldType { } public String indexedToReadable(String indexedForm) { - return indexedForm + 'Z'; + return indexedForm + Z; } public String toExternal(Fieldable f) { return indexedToReadable(f.stringValue()); } + public Date toObject(String indexedForm) throws java.text.ParseException { + return getThreadLocalDateFormat().parse(indexedToReadable(indexedForm)); + } @Override public Date toObject(Fieldable f) { diff --git a/src/test/org/apache/solr/BasicFunctionalityTest.java b/src/test/org/apache/solr/BasicFunctionalityTest.java index fac438bc862..49ef0d5a827 100644 --- a/src/test/org/apache/solr/BasicFunctionalityTest.java +++ b/src/test/org/apache/solr/BasicFunctionalityTest.java @@ -612,6 +612,122 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { } + public void testDateFacets() { + final String f = "bday"; + final String pre = "//lst[@name='facet_dates']/lst[@name='"+f+"']"; + + assertU(adoc("id", "1", f, "1976-07-04T12:08:56.235Z")); + assertU(adoc("id", "2", f, "1976-07-05T00:00:00.000Z")); + assertU(adoc("id", "3", f, "1976-07-15T00:07:67.890Z")); + assertU(adoc("id", "4", f, "1976-07-21T00:07:67.890Z")); + assertU(adoc("id", "5", f, "1976-07-13T12:12:25.255Z")); + assertU(adoc("id", "6", f, "1976-07-03T17:01:23.456Z")); + assertU(adoc("id", "7", f, "1976-07-12T12:12:25.255Z")); + assertU(adoc("id", "8", f, "1976-07-15T15:15:15.155Z")); + assertU(adoc("id", "9", f, "1907-07-12T13:13:23.235Z")); + assertU(adoc("id", "10", f, "1976-07-03T11:02:45.678Z")); + assertU(adoc("id", "11", f, "1907-07-12T12:12:25.255Z")); + assertU(adoc("id", "12", f, "2007-07-30T07:07:07.070Z")); + assertU(adoc("id", "13", f, "1976-07-30T22:22:22.222Z")); + assertU(adoc("id", "14", f, "1976-07-05T22:22:22.222Z")); + assertU(commit()); + + assertQ("check counts for month of facet by day", + req( "q", "*:*" + ,"rows", "0" + ,"facet", "true" + ,"facet.date", f + ,"facet.date.start", "1976-07-01T00:00:00.000Z" + ,"facet.date.end", "1976-07-01T00:00:00.000Z+1MONTH" + ,"facet.date.gap", "+1DAY" + ,"facet.date.other", "all" + ) + // 31 days + pre+post+inner = 34 + ,"*[count("+pre+"/int)=34]" + ,pre+"/int[@name='1976-07-01T00:00:00.000Z'][.='0' ]" + ,pre+"/int[@name='1976-07-02T00:00:00.000Z'][.='0' ]" + ,pre+"/int[@name='1976-07-03T00:00:00.000Z'][.='2' ]" + // july4th = 2 because exists doc @ 00:00:00.000 on July5 + // (date faceting is inclusive) + ,pre+"/int[@name='1976-07-04T00:00:00.000Z'][.='2' ]" + ,pre+"/int[@name='1976-07-05T00:00:00.000Z'][.='2' ]" + ,pre+"/int[@name='1976-07-06T00:00:00.000Z'][.='0']" + ,pre+"/int[@name='1976-07-07T00:00:00.000Z'][.='0']" + ,pre+"/int[@name='1976-07-08T00:00:00.000Z'][.='0']" + ,pre+"/int[@name='1976-07-09T00:00:00.000Z'][.='0']" + ,pre+"/int[@name='1976-07-10T00:00:00.000Z'][.='0']" + ,pre+"/int[@name='1976-07-11T00:00:00.000Z'][.='0']" + ,pre+"/int[@name='1976-07-12T00:00:00.000Z'][.='1' ]" + ,pre+"/int[@name='1976-07-13T00:00:00.000Z'][.='1' ]" + ,pre+"/int[@name='1976-07-14T00:00:00.000Z'][.='0']" + ,pre+"/int[@name='1976-07-15T00:00:00.000Z'][.='2' ]" + ,pre+"/int[@name='1976-07-16T00:00:00.000Z'][.='0']" + ,pre+"/int[@name='1976-07-17T00:00:00.000Z'][.='0']" + ,pre+"/int[@name='1976-07-18T00:00:00.000Z'][.='0']" + ,pre+"/int[@name='1976-07-19T00:00:00.000Z'][.='0']" + ,pre+"/int[@name='1976-07-21T00:00:00.000Z'][.='1' ]" + ,pre+"/int[@name='1976-07-22T00:00:00.000Z'][.='0']" + ,pre+"/int[@name='1976-07-23T00:00:00.000Z'][.='0']" + ,pre+"/int[@name='1976-07-24T00:00:00.000Z'][.='0']" + ,pre+"/int[@name='1976-07-25T00:00:00.000Z'][.='0']" + ,pre+"/int[@name='1976-07-26T00:00:00.000Z'][.='0']" + ,pre+"/int[@name='1976-07-27T00:00:00.000Z'][.='0']" + ,pre+"/int[@name='1976-07-28T00:00:00.000Z'][.='0']" + ,pre+"/int[@name='1976-07-29T00:00:00.000Z'][.='0']" + ,pre+"/int[@name='1976-07-30T00:00:00.000Z'][.='1' ]" + ,pre+"/int[@name='1976-07-31T00:00:00.000Z'][.='0']" + + ,pre+"/int[@name='before' ][.='2']" + ,pre+"/int[@name='after' ][.='1']" + ,pre+"/int[@name='between'][.='11']" + + ); + + assertQ("check hardend=false", + req( "q", "*:*" + ,"rows", "0" + ,"facet", "true" + ,"facet.date", f + ,"facet.date.start", "1976-07-01T00:00:00.000Z" + ,"facet.date.end", "1976-07-13T00:00:00.000Z" + ,"facet.date.gap", "+5DAYS" + ,"facet.date.other", "all" + ,"facet.date.hardend","false" + ) + // 3 gaps + pre+post+inner = 6 + ,"*[count("+pre+"/int)=6]" + ,pre+"/int[@name='1976-07-01T00:00:00.000Z'][.='5' ]" + ,pre+"/int[@name='1976-07-06T00:00:00.000Z'][.='0' ]" + ,pre+"/int[@name='1976-07-11T00:00:00.000Z'][.='4' ]" + + ,pre+"/int[@name='before' ][.='2']" + ,pre+"/int[@name='after' ][.='3']" + ,pre+"/int[@name='between'][.='9']" + ); + + assertQ("check hardend=true", + req( "q", "*:*" + ,"rows", "0" + ,"facet", "true" + ,"facet.date", f + ,"facet.date.start", "1976-07-01T00:00:00.000Z" + ,"facet.date.end", "1976-07-13T00:00:00.000Z" + ,"facet.date.gap", "+5DAYS" + ,"facet.date.other", "all" + ,"facet.date.hardend","true" + ) + // 3 gaps + pre+post+inner = 6 + ,"*[count("+pre+"/int)=6]" + ,pre+"/int[@name='1976-07-01T00:00:00.000Z'][.='5' ]" + ,pre+"/int[@name='1976-07-06T00:00:00.000Z'][.='0' ]" + ,pre+"/int[@name='1976-07-11T00:00:00.000Z'][.='1' ]" + + ,pre+"/int[@name='before' ][.='2']" + ,pre+"/int[@name='after' ][.='6']" + ,pre+"/int[@name='between'][.='6']" + ); + + } public void testFacetMultiValued() { doFacets("t_s"); @@ -1128,15 +1244,29 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { // ... // BUT: we can test that crazy combinations of "NOW" all work correctly, // assuming the test doesn't take too long to run... - - assertU(adoc("id", "1", "bday", "1976-07-04T12:08:56.235Z")); + + final String july4 = "1976-07-04T12:08:56.235Z"; + assertU(adoc("id", "1", "bday", july4)); assertU(adoc("id", "2", "bday", "NOW")); assertU(adoc("id", "3", "bday", "NOW/HOUR")); assertU(adoc("id", "4", "bday", "NOW-30MINUTES")); assertU(adoc("id", "5", "bday", "NOW+30MINUTES")); assertU(adoc("id", "6", "bday", "NOW+2YEARS")); assertU(commit()); - + + assertQ("check math on absolute date#1", + req("q", "bday:[* TO "+july4+"/SECOND]"), + "*[count(//doc)=0]"); + assertQ("check math on absolute date#2", + req("q", "bday:[* TO "+july4+"/SECOND+1SECOND]"), + "*[count(//doc)=1]"); + assertQ("check math on absolute date#3", + req("q", "bday:["+july4+"/SECOND TO "+july4+"/SECOND+1SECOND]"), + "*[count(//doc)=1]"); + assertQ("check math on absolute date#4", + req("q", "bday:["+july4+"/MINUTE+1MINUTE TO *]"), + "*[count(//doc)=5]"); + assertQ("check count for before now", req("q", "bday:[* TO NOW]"), "*[count(//doc)=4]");