SOLR-258: Date Faceting added to SimpleFacets

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@560686 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2007-07-29 06:28:41 +00:00
parent 4ac947d37b
commit 670b25d967
5 changed files with 415 additions and 20 deletions

View File

@ -113,6 +113,10 @@ New Features
20. SOLR-102: Added RegexFragmenter, which splits text for highlighting 20. SOLR-102: Added RegexFragmenter, which splits text for highlighting
based on a given pattern. (klaas) based on a given pattern. (klaas)
21. SOLR-258: Date Faceting added to SimpleFacets. Facet counts
computed for ranges of size facet.date.gap (a DateMath expression)
between facet.date.start and facet.date.end. (hossman)
Changes in runtime behavior Changes in runtime behavior
Optimizations Optimizations

View File

@ -121,8 +121,74 @@ public abstract class SolrParams {
* only use the filterCache for terms with a df >= to this parameter. * only use the filterCache for terms with a df >= to this parameter.
*/ */
public static final String FACET_ENUM_CACHE_MINDF = "facet.enum.cache.minDf"; public static final String FACET_ENUM_CACHE_MINDF = "facet.enum.cache.minDf";
/**
* Any field whose terms the user wants to enumerate over for
* Facet Contraint Counts (multi-value)
*/
public static final String FACET_DATE = "facet.date";
/**
* Date string indicating the starting point for a date facet range.
* Can be overriden on a per field basis.
*/
public static final String FACET_DATE_START = "facet.date.start";
/**
* Date string indicating the endinging point for a date facet range.
* Can be overriden on a per field basis.
*/
public static final String FACET_DATE_END = "facet.date.end";
/**
* Date Math string indicating the interval of sub-ranges for a date
* facet range.
* Can be overriden on a per field basis.
*/
public static final String FACET_DATE_GAP = "facet.date.gap";
/**
* Boolean indicating how counts should be computed if the range
* between 'start' and 'end' is not evenly divisible by 'gap'. If
* this value is true, then all counts of ranges involving the 'end'
* point will use the exact endpoint specified -- this includes the
* 'between' and 'after' counts as well as the last range computed
* using the 'gap'. If the value is false, then 'gap' is used to
* compute the effective endpoint closest to the 'end' param which
* results in the range between 'start' and 'end' being evenly
* divisible by 'gap'.
* The default is false.
* Can be overriden on a per field basis.
*/
public static final String FACET_DATE_HARD_END = "facet.date.hardend";
/**
* String indicating what "other" ranges should be computed for a
* date facet range (multi-value).
* Can be overriden on a per field basis.
* @see FacetDateOther
*/
public static final String FACET_DATE_OTHER = "facet.date.other";
/**
* An enumeration of the legal values for FACET_DATE_OTHER...
* <ul>
* <li>before = the count of matches before the start date</li>
* <li>after = the count of matches after the end date</li>
* <li>between = the count of all matches between start and end</li>
* <li>all = all of the above (default value)</li>
* <li>none = no additional info requested</li>
* </ul>
* @see #FACET_DATE_OTHER
*/
public enum FacetDateOther {
BEFORE, AFTER, BETWEEN, ALL, NONE;
public String toString() { return super.toString().toLowerCase(); }
public static FacetDateOther get(String label) {
try {
return valueOf(label.toUpperCase());
} catch (IllegalArgumentException e) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
label+" is not a valid type of 'other' date facet information",e);
}
}
}
/** If the content stream should come from a URL (using URLConnection) */ /** If the content stream should come from a URL (using URLConnection) */
public static final String STREAM_URL = "stream.url"; public static final String STREAM_URL = "stream.url";

View File

@ -25,6 +25,7 @@ import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.*; import org.apache.lucene.search.*;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.SolrParams.FacetDateOther;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
@ -33,12 +34,18 @@ import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.BoolField; import org.apache.solr.schema.BoolField;
import org.apache.solr.schema.DateField;
import org.apache.solr.search.*; import org.apache.solr.search.*;
import org.apache.solr.util.BoundedTreeSet; import org.apache.solr.util.BoundedTreeSet;
import org.apache.solr.util.DateMathParser;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Comparator; import java.util.Comparator;
import java.util.Date;
import java.util.Locale;
import java.util.Set;
import java.util.EnumSet;
/** /**
* A class that generates simple Facet information for a request. * A class that generates simple Facet information for a request.
@ -70,6 +77,7 @@ public class SimpleFacets {
* *
* @see #getFacetQueryCounts * @see #getFacetQueryCounts
* @see #getFacetFieldCounts * @see #getFacetFieldCounts
* @see #getFacetDateCounts
* @see SolrParams#FACET * @see SolrParams#FACET
* @return a NamedList of Facet Count info or null * @return a NamedList of Facet Count info or null
*/ */
@ -83,9 +91,9 @@ public class SimpleFacets {
try { try {
res.add("facet_queries", getFacetQueryCounts()); res.add("facet_queries", getFacetQueryCounts());
res.add("facet_fields", getFacetFieldCounts()); res.add("facet_fields", getFacetFieldCounts());
res.add("facet_dates", getFacetDateCounts());
} catch (Exception e) { } catch (Exception e) {
SolrException.logOnce(SolrCore.log, "Exception during facet counts", e); SolrException.logOnce(SolrCore.log, "Exception during facet counts", e);
res.add("exception", SolrException.toStr(e)); res.add("exception", SolrException.toStr(e));
@ -402,6 +410,148 @@ public class SimpleFacets {
return res; return res;
} }
/**
* Returns a list of value constraints and the associated facet counts
* for each facet date field, range, and interval specified in the
* SolrParams
*
* @see SolrParams#FACET_DATE
*/
public NamedList getFacetDateCounts()
throws IOException {
final SolrParams required = new RequiredSolrParams(params);
final NamedList resOuter = new SimpleOrderedMap();
final String[] fields = params.getParams(SolrParams.FACET_DATE);
final Date NOW = new Date();
if (null == fields || 0 == fields.length) return resOuter;
final IndexSchema schema = searcher.getSchema();
for (String f : fields) {
final NamedList resInner = new SimpleOrderedMap();
resOuter.add(f, resInner);
final FieldType trash = schema.getFieldType(f);
if (! (trash instanceof DateField)) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"Can not date facet on a field which is not a DateField: " + f);
}
final DateField ft = (DateField) trash;
final String startS
= required.getFieldParam(f,SolrParams.FACET_DATE_START);
final Date start;
try {
start = ft.parseMath(NOW, startS);
} catch (SolrException e) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"date facet 'start' is not a valid Date string: " + startS, e);
}
final String endS
= required.getFieldParam(f,SolrParams.FACET_DATE_END);
Date end; // not final, hardend may change this
try {
end = ft.parseMath(NOW, endS);
} catch (SolrException e) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"date facet 'end' is not a valid Date string: " + endS, e);
}
if (end.before(start)) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"date facet 'end' comes before 'start': "+endS+" < "+startS);
}
final String gap = required.getFieldParam(f,SolrParams.FACET_DATE_GAP);
final DateMathParser dmp = new DateMathParser(ft.UTC, Locale.US);
dmp.setNow(NOW);
try {
Date low = start;
while (low.before(end)) {
dmp.setNow(low);
final String lowI = ft.toInternal(low);
final String label = ft.indexedToReadable(lowI);
Date high = dmp.parseMath(gap);
if (end.before(high)) {
if (params.getFieldBool(f,SolrParams.FACET_DATE_HARD_END,false)) {
high = end;
} else {
end = high;
}
}
if (high.before(low)) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"date facet infinite loop (is gap negative?)");
}
final String highI = ft.toInternal(high);
resInner.add(label, rangeCount(f,lowI,highI,true,true));
low = high;
}
} catch (java.text.ParseException e) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"date facet 'gap' is not a valid Date Math string: " + gap, e);
}
// explicitly return the gap and end so all the counts are meaningful
resInner.add("gap", gap);
resInner.add("end", end);
final String[] othersP =
params.getFieldParams(f,SolrParams.FACET_DATE_OTHER);
if (null != othersP && 0 < othersP.length ) {
Set<FacetDateOther> others = EnumSet.noneOf(FacetDateOther.class);
for (final String o : othersP) {
others.add(FacetDateOther.get(o));
}
// no matter what other values are listed, we don't do
// anything if "none" is specified.
if (! others.contains(FacetDateOther.NONE) ) {
final String startI = ft.toInternal(start);
final String endI = ft.toInternal(end);
boolean all = others.contains(FacetDateOther.ALL);
if (all || others.contains(FacetDateOther.BEFORE)) {
resInner.add(FacetDateOther.BEFORE.toString(),
rangeCount(f,null,startI,false,false));
}
if (all || others.contains(FacetDateOther.AFTER)) {
resInner.add(FacetDateOther.AFTER.toString(),
rangeCount(f,endI,null,false,false));
}
if (all || others.contains(FacetDateOther.BETWEEN)) {
resInner.add(FacetDateOther.BETWEEN.toString(),
rangeCount(f,startI,endI,true,true));
}
}
}
}
return resOuter;
}
/**
* Macro for getting the numDocs of a ConstantScoreRangeQuery over docs
* @see docs
* @see SolrIndexSearcher#numDocs
* @see ConstantScoreRangeQuery
*/
protected int rangeCount(String field, String low, String high,
boolean iLow, boolean iHigh) throws IOException {
return searcher.numDocs(new ConstantScoreRangeQuery(field,low,high,
iHigh,iLow),
docs);
}
/** /**
* A simple key=>val pair whose natural order is such that * A simple key=>val pair whose natural order is such that
* <b>higher</b> vals come before lower vals. * <b>higher</b> vals come before lower vals.

View File

@ -70,9 +70,10 @@ import java.text.ParseException;
* *
* <p> * <p>
* This FieldType also supports incoming "Date Math" strings for computing * This FieldType also supports incoming "Date Math" strings for computing
* values by adding/rounding internals of time relative "NOW", * values by adding/rounding internals of time relative either an explicit
* ie: "NOW+1YEAR", "NOW/DAY", etc.. -- see {@link DateMathParser} * datetime (in theformat specified above) or the literal string "NOW",
* for more examples. * ie: "NOW+1YEAR", "NOW/DAY", 1995-12-31T23:59:59.999Z+5MINUTES, etc...
* -- see {@link DateMathParser} for more examples.
* </p> * </p>
* *
* @version $Id$ * @version $Id$
@ -91,20 +92,61 @@ public class DateField extends FieldType {
protected void init(IndexSchema schema, Map<String,String> args) { protected void init(IndexSchema schema, Map<String,String> args) {
} }
protected static String NOW = "NOW";
protected static char Z = 'Z';
public String toInternal(String val) { public String toInternal(String val) {
int len=val.length(); final int len=val.length();
if (val.charAt(len-1)=='Z') { if (val.charAt(len-1) == Z) {
// check common case first, simple datetime
// NOTE: not parsed to ensure correctness
return val.substring(0,len-1); return val.substring(0,len-1);
} else if (val.startsWith("NOW")) { }
/* :TODO: let Locale/TimeZone come from init args for rounding only */ return toInternal(parseMath(null, val));
DateMathParser p = new DateMathParser(UTC, Locale.US); }
try {
return toInternal(p.parseMath(val.substring(3))); /**
} catch (ParseException e) { * Parses a String which may be a date (in the standard format)
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Invalid Date Math String:'" +val+'\'',e); * followed by an optional math expression.
* @param now an optional fixed date to use as "NOW" in the DateMathParser
* @param val the string to parse
*/
public Date parseMath(Date now, String val) {
String math = null;
/* :TODO: let Locale/TimeZone come from init args for rounding only */
final DateMathParser p = new DateMathParser(UTC, Locale.US);
if (null != now) p.setNow(now);
if (val.startsWith(NOW)) {
math = val.substring(NOW.length());
} else {
final int zz = val.indexOf(Z);
if (0 < zz) {
math = val.substring(zz+1);
try {
p.setNow(toObject(val.substring(0,zz)));
} catch (ParseException e) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
"Invalid Date in Date Math String:'"
+val+'\'',e);
}
} else {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
"Invalid Date String:'" +val+'\'');
} }
} }
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Invalid Date String:'" +val+'\'');
if (null == math || math.equals("")) {
return p.getNow();
}
try {
return p.parseMath(math);
} catch (ParseException e) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
"Invalid Date Math String:'" +val+'\'',e);
}
} }
public String toInternal(Date val) { public String toInternal(Date val) {
@ -112,12 +154,15 @@ public class DateField extends FieldType {
} }
public String indexedToReadable(String indexedForm) { public String indexedToReadable(String indexedForm) {
return indexedForm + 'Z'; return indexedForm + Z;
} }
public String toExternal(Fieldable f) { public String toExternal(Fieldable f) {
return indexedToReadable(f.stringValue()); return indexedToReadable(f.stringValue());
} }
public Date toObject(String indexedForm) throws java.text.ParseException {
return getThreadLocalDateFormat().parse(indexedToReadable(indexedForm));
}
@Override @Override
public Date toObject(Fieldable f) { public Date toObject(Fieldable f) {

View File

@ -612,6 +612,122 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
} }
public void testDateFacets() {
final String f = "bday";
final String pre = "//lst[@name='facet_dates']/lst[@name='"+f+"']";
assertU(adoc("id", "1", f, "1976-07-04T12:08:56.235Z"));
assertU(adoc("id", "2", f, "1976-07-05T00:00:00.000Z"));
assertU(adoc("id", "3", f, "1976-07-15T00:07:67.890Z"));
assertU(adoc("id", "4", f, "1976-07-21T00:07:67.890Z"));
assertU(adoc("id", "5", f, "1976-07-13T12:12:25.255Z"));
assertU(adoc("id", "6", f, "1976-07-03T17:01:23.456Z"));
assertU(adoc("id", "7", f, "1976-07-12T12:12:25.255Z"));
assertU(adoc("id", "8", f, "1976-07-15T15:15:15.155Z"));
assertU(adoc("id", "9", f, "1907-07-12T13:13:23.235Z"));
assertU(adoc("id", "10", f, "1976-07-03T11:02:45.678Z"));
assertU(adoc("id", "11", f, "1907-07-12T12:12:25.255Z"));
assertU(adoc("id", "12", f, "2007-07-30T07:07:07.070Z"));
assertU(adoc("id", "13", f, "1976-07-30T22:22:22.222Z"));
assertU(adoc("id", "14", f, "1976-07-05T22:22:22.222Z"));
assertU(commit());
assertQ("check counts for month of facet by day",
req( "q", "*:*"
,"rows", "0"
,"facet", "true"
,"facet.date", f
,"facet.date.start", "1976-07-01T00:00:00.000Z"
,"facet.date.end", "1976-07-01T00:00:00.000Z+1MONTH"
,"facet.date.gap", "+1DAY"
,"facet.date.other", "all"
)
// 31 days + pre+post+inner = 34
,"*[count("+pre+"/int)=34]"
,pre+"/int[@name='1976-07-01T00:00:00.000Z'][.='0' ]"
,pre+"/int[@name='1976-07-02T00:00:00.000Z'][.='0' ]"
,pre+"/int[@name='1976-07-03T00:00:00.000Z'][.='2' ]"
// july4th = 2 because exists doc @ 00:00:00.000 on July5
// (date faceting is inclusive)
,pre+"/int[@name='1976-07-04T00:00:00.000Z'][.='2' ]"
,pre+"/int[@name='1976-07-05T00:00:00.000Z'][.='2' ]"
,pre+"/int[@name='1976-07-06T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-07T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-08T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-09T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-10T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-11T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-12T00:00:00.000Z'][.='1' ]"
,pre+"/int[@name='1976-07-13T00:00:00.000Z'][.='1' ]"
,pre+"/int[@name='1976-07-14T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-15T00:00:00.000Z'][.='2' ]"
,pre+"/int[@name='1976-07-16T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-17T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-18T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-19T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-21T00:00:00.000Z'][.='1' ]"
,pre+"/int[@name='1976-07-22T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-23T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-24T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-25T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-26T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-27T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-28T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-29T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-30T00:00:00.000Z'][.='1' ]"
,pre+"/int[@name='1976-07-31T00:00:00.000Z'][.='0']"
,pre+"/int[@name='before' ][.='2']"
,pre+"/int[@name='after' ][.='1']"
,pre+"/int[@name='between'][.='11']"
);
assertQ("check hardend=false",
req( "q", "*:*"
,"rows", "0"
,"facet", "true"
,"facet.date", f
,"facet.date.start", "1976-07-01T00:00:00.000Z"
,"facet.date.end", "1976-07-13T00:00:00.000Z"
,"facet.date.gap", "+5DAYS"
,"facet.date.other", "all"
,"facet.date.hardend","false"
)
// 3 gaps + pre+post+inner = 6
,"*[count("+pre+"/int)=6]"
,pre+"/int[@name='1976-07-01T00:00:00.000Z'][.='5' ]"
,pre+"/int[@name='1976-07-06T00:00:00.000Z'][.='0' ]"
,pre+"/int[@name='1976-07-11T00:00:00.000Z'][.='4' ]"
,pre+"/int[@name='before' ][.='2']"
,pre+"/int[@name='after' ][.='3']"
,pre+"/int[@name='between'][.='9']"
);
assertQ("check hardend=true",
req( "q", "*:*"
,"rows", "0"
,"facet", "true"
,"facet.date", f
,"facet.date.start", "1976-07-01T00:00:00.000Z"
,"facet.date.end", "1976-07-13T00:00:00.000Z"
,"facet.date.gap", "+5DAYS"
,"facet.date.other", "all"
,"facet.date.hardend","true"
)
// 3 gaps + pre+post+inner = 6
,"*[count("+pre+"/int)=6]"
,pre+"/int[@name='1976-07-01T00:00:00.000Z'][.='5' ]"
,pre+"/int[@name='1976-07-06T00:00:00.000Z'][.='0' ]"
,pre+"/int[@name='1976-07-11T00:00:00.000Z'][.='1' ]"
,pre+"/int[@name='before' ][.='2']"
,pre+"/int[@name='after' ][.='6']"
,pre+"/int[@name='between'][.='6']"
);
}
public void testFacetMultiValued() { public void testFacetMultiValued() {
doFacets("t_s"); doFacets("t_s");
@ -1128,15 +1244,29 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
// ... // ...
// BUT: we can test that crazy combinations of "NOW" all work correctly, // BUT: we can test that crazy combinations of "NOW" all work correctly,
// assuming the test doesn't take too long to run... // assuming the test doesn't take too long to run...
assertU(adoc("id", "1", "bday", "1976-07-04T12:08:56.235Z")); final String july4 = "1976-07-04T12:08:56.235Z";
assertU(adoc("id", "1", "bday", july4));
assertU(adoc("id", "2", "bday", "NOW")); assertU(adoc("id", "2", "bday", "NOW"));
assertU(adoc("id", "3", "bday", "NOW/HOUR")); assertU(adoc("id", "3", "bday", "NOW/HOUR"));
assertU(adoc("id", "4", "bday", "NOW-30MINUTES")); assertU(adoc("id", "4", "bday", "NOW-30MINUTES"));
assertU(adoc("id", "5", "bday", "NOW+30MINUTES")); assertU(adoc("id", "5", "bday", "NOW+30MINUTES"));
assertU(adoc("id", "6", "bday", "NOW+2YEARS")); assertU(adoc("id", "6", "bday", "NOW+2YEARS"));
assertU(commit()); assertU(commit());
assertQ("check math on absolute date#1",
req("q", "bday:[* TO "+july4+"/SECOND]"),
"*[count(//doc)=0]");
assertQ("check math on absolute date#2",
req("q", "bday:[* TO "+july4+"/SECOND+1SECOND]"),
"*[count(//doc)=1]");
assertQ("check math on absolute date#3",
req("q", "bday:["+july4+"/SECOND TO "+july4+"/SECOND+1SECOND]"),
"*[count(//doc)=1]");
assertQ("check math on absolute date#4",
req("q", "bday:["+july4+"/MINUTE+1MINUTE TO *]"),
"*[count(//doc)=5]");
assertQ("check count for before now", assertQ("check count for before now",
req("q", "bday:[* TO NOW]"), "*[count(//doc)=4]"); req("q", "bday:[* TO NOW]"), "*[count(//doc)=4]");