SOLR-258: Date Faceting added to SimpleFacets

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@560686 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2007-07-29 06:28:41 +00:00
parent 4ac947d37b
commit 670b25d967
5 changed files with 415 additions and 20 deletions

View File

@ -113,6 +113,10 @@ New Features
20. SOLR-102: Added RegexFragmenter, which splits text for highlighting
based on a given pattern. (klaas)
21. SOLR-258: Date Faceting added to SimpleFacets. Facet counts
computed for ranges of size facet.date.gap (a DateMath expression)
between facet.date.start and facet.date.end. (hossman)
Changes in runtime behavior
Optimizations

View File

@ -121,7 +121,73 @@ public abstract class SolrParams {
* only use the filterCache for terms with a df >= to this parameter.
*/
public static final String FACET_ENUM_CACHE_MINDF = "facet.enum.cache.minDf";
/**
* Any field whose terms the user wants to enumerate over for
* Facet Contraint Counts (multi-value)
*/
public static final String FACET_DATE = "facet.date";
/**
* Date string indicating the starting point for a date facet range.
* Can be overriden on a per field basis.
*/
public static final String FACET_DATE_START = "facet.date.start";
/**
* Date string indicating the endinging point for a date facet range.
* Can be overriden on a per field basis.
*/
public static final String FACET_DATE_END = "facet.date.end";
/**
* Date Math string indicating the interval of sub-ranges for a date
* facet range.
* Can be overriden on a per field basis.
*/
public static final String FACET_DATE_GAP = "facet.date.gap";
/**
* Boolean indicating how counts should be computed if the range
* between 'start' and 'end' is not evenly divisible by 'gap'. If
* this value is true, then all counts of ranges involving the 'end'
* point will use the exact endpoint specified -- this includes the
* 'between' and 'after' counts as well as the last range computed
* using the 'gap'. If the value is false, then 'gap' is used to
* compute the effective endpoint closest to the 'end' param which
* results in the range between 'start' and 'end' being evenly
* divisible by 'gap'.
* The default is false.
* Can be overriden on a per field basis.
*/
public static final String FACET_DATE_HARD_END = "facet.date.hardend";
/**
* String indicating what "other" ranges should be computed for a
* date facet range (multi-value).
* Can be overriden on a per field basis.
* @see FacetDateOther
*/
public static final String FACET_DATE_OTHER = "facet.date.other";
/**
* An enumeration of the legal values for FACET_DATE_OTHER...
* <ul>
* <li>before = the count of matches before the start date</li>
* <li>after = the count of matches after the end date</li>
* <li>between = the count of all matches between start and end</li>
* <li>all = all of the above (default value)</li>
* <li>none = no additional info requested</li>
* </ul>
* @see #FACET_DATE_OTHER
*/
public enum FacetDateOther {
BEFORE, AFTER, BETWEEN, ALL, NONE;
public String toString() { return super.toString().toLowerCase(); }
public static FacetDateOther get(String label) {
try {
return valueOf(label.toUpperCase());
} catch (IllegalArgumentException e) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
label+" is not a valid type of 'other' date facet information",e);
}
}
}
/** If the content stream should come from a URL (using URLConnection) */
public static final String STREAM_URL = "stream.url";

View File

@ -25,6 +25,7 @@ import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.*;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.SolrParams.FacetDateOther;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrCore;
@ -33,12 +34,18 @@ import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.BoolField;
import org.apache.solr.schema.DateField;
import org.apache.solr.search.*;
import org.apache.solr.util.BoundedTreeSet;
import org.apache.solr.util.DateMathParser;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Date;
import java.util.Locale;
import java.util.Set;
import java.util.EnumSet;
/**
* A class that generates simple Facet information for a request.
@ -70,6 +77,7 @@ public class SimpleFacets {
*
* @see #getFacetQueryCounts
* @see #getFacetFieldCounts
* @see #getFacetDateCounts
* @see SolrParams#FACET
* @return a NamedList of Facet Count info or null
*/
@ -83,8 +91,8 @@ public class SimpleFacets {
try {
res.add("facet_queries", getFacetQueryCounts());
res.add("facet_fields", getFacetFieldCounts());
res.add("facet_dates", getFacetDateCounts());
} catch (Exception e) {
SolrException.logOnce(SolrCore.log, "Exception during facet counts", e);
@ -402,6 +410,148 @@ public class SimpleFacets {
return res;
}
/**
* Returns a list of value constraints and the associated facet counts
* for each facet date field, range, and interval specified in the
* SolrParams
*
* @see SolrParams#FACET_DATE
*/
public NamedList getFacetDateCounts()
throws IOException {
final SolrParams required = new RequiredSolrParams(params);
final NamedList resOuter = new SimpleOrderedMap();
final String[] fields = params.getParams(SolrParams.FACET_DATE);
final Date NOW = new Date();
if (null == fields || 0 == fields.length) return resOuter;
final IndexSchema schema = searcher.getSchema();
for (String f : fields) {
final NamedList resInner = new SimpleOrderedMap();
resOuter.add(f, resInner);
final FieldType trash = schema.getFieldType(f);
if (! (trash instanceof DateField)) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"Can not date facet on a field which is not a DateField: " + f);
}
final DateField ft = (DateField) trash;
final String startS
= required.getFieldParam(f,SolrParams.FACET_DATE_START);
final Date start;
try {
start = ft.parseMath(NOW, startS);
} catch (SolrException e) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"date facet 'start' is not a valid Date string: " + startS, e);
}
final String endS
= required.getFieldParam(f,SolrParams.FACET_DATE_END);
Date end; // not final, hardend may change this
try {
end = ft.parseMath(NOW, endS);
} catch (SolrException e) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"date facet 'end' is not a valid Date string: " + endS, e);
}
if (end.before(start)) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"date facet 'end' comes before 'start': "+endS+" < "+startS);
}
final String gap = required.getFieldParam(f,SolrParams.FACET_DATE_GAP);
final DateMathParser dmp = new DateMathParser(ft.UTC, Locale.US);
dmp.setNow(NOW);
try {
Date low = start;
while (low.before(end)) {
dmp.setNow(low);
final String lowI = ft.toInternal(low);
final String label = ft.indexedToReadable(lowI);
Date high = dmp.parseMath(gap);
if (end.before(high)) {
if (params.getFieldBool(f,SolrParams.FACET_DATE_HARD_END,false)) {
high = end;
} else {
end = high;
}
}
if (high.before(low)) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"date facet infinite loop (is gap negative?)");
}
final String highI = ft.toInternal(high);
resInner.add(label, rangeCount(f,lowI,highI,true,true));
low = high;
}
} catch (java.text.ParseException e) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"date facet 'gap' is not a valid Date Math string: " + gap, e);
}
// explicitly return the gap and end so all the counts are meaningful
resInner.add("gap", gap);
resInner.add("end", end);
final String[] othersP =
params.getFieldParams(f,SolrParams.FACET_DATE_OTHER);
if (null != othersP && 0 < othersP.length ) {
Set<FacetDateOther> others = EnumSet.noneOf(FacetDateOther.class);
for (final String o : othersP) {
others.add(FacetDateOther.get(o));
}
// no matter what other values are listed, we don't do
// anything if "none" is specified.
if (! others.contains(FacetDateOther.NONE) ) {
final String startI = ft.toInternal(start);
final String endI = ft.toInternal(end);
boolean all = others.contains(FacetDateOther.ALL);
if (all || others.contains(FacetDateOther.BEFORE)) {
resInner.add(FacetDateOther.BEFORE.toString(),
rangeCount(f,null,startI,false,false));
}
if (all || others.contains(FacetDateOther.AFTER)) {
resInner.add(FacetDateOther.AFTER.toString(),
rangeCount(f,endI,null,false,false));
}
if (all || others.contains(FacetDateOther.BETWEEN)) {
resInner.add(FacetDateOther.BETWEEN.toString(),
rangeCount(f,startI,endI,true,true));
}
}
}
}
return resOuter;
}
/**
* Macro for getting the numDocs of a ConstantScoreRangeQuery over docs
* @see docs
* @see SolrIndexSearcher#numDocs
* @see ConstantScoreRangeQuery
*/
protected int rangeCount(String field, String low, String high,
boolean iLow, boolean iHigh) throws IOException {
return searcher.numDocs(new ConstantScoreRangeQuery(field,low,high,
iHigh,iLow),
docs);
}
/**
* A simple key=>val pair whose natural order is such that
* <b>higher</b> vals come before lower vals.

View File

@ -70,9 +70,10 @@ import java.text.ParseException;
*
* <p>
* This FieldType also supports incoming "Date Math" strings for computing
* values by adding/rounding internals of time relative "NOW",
* ie: "NOW+1YEAR", "NOW/DAY", etc.. -- see {@link DateMathParser}
* for more examples.
* values by adding/rounding internals of time relative either an explicit
* datetime (in theformat specified above) or the literal string "NOW",
* ie: "NOW+1YEAR", "NOW/DAY", 1995-12-31T23:59:59.999Z+5MINUTES, etc...
* -- see {@link DateMathParser} for more examples.
* </p>
*
* @version $Id$
@ -91,20 +92,61 @@ public class DateField extends FieldType {
protected void init(IndexSchema schema, Map<String,String> args) {
}
protected static String NOW = "NOW";
protected static char Z = 'Z';
public String toInternal(String val) {
int len=val.length();
if (val.charAt(len-1)=='Z') {
final int len=val.length();
if (val.charAt(len-1) == Z) {
// check common case first, simple datetime
// NOTE: not parsed to ensure correctness
return val.substring(0,len-1);
} else if (val.startsWith("NOW")) {
/* :TODO: let Locale/TimeZone come from init args for rounding only */
DateMathParser p = new DateMathParser(UTC, Locale.US);
try {
return toInternal(p.parseMath(val.substring(3)));
} catch (ParseException e) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Invalid Date Math String:'" +val+'\'',e);
}
return toInternal(parseMath(null, val));
}
/**
* Parses a String which may be a date (in the standard format)
* followed by an optional math expression.
* @param now an optional fixed date to use as "NOW" in the DateMathParser
* @param val the string to parse
*/
public Date parseMath(Date now, String val) {
String math = null;
/* :TODO: let Locale/TimeZone come from init args for rounding only */
final DateMathParser p = new DateMathParser(UTC, Locale.US);
if (null != now) p.setNow(now);
if (val.startsWith(NOW)) {
math = val.substring(NOW.length());
} else {
final int zz = val.indexOf(Z);
if (0 < zz) {
math = val.substring(zz+1);
try {
p.setNow(toObject(val.substring(0,zz)));
} catch (ParseException e) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
"Invalid Date in Date Math String:'"
+val+'\'',e);
}
} else {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
"Invalid Date String:'" +val+'\'');
}
}
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Invalid Date String:'" +val+'\'');
if (null == math || math.equals("")) {
return p.getNow();
}
try {
return p.parseMath(math);
} catch (ParseException e) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
"Invalid Date Math String:'" +val+'\'',e);
}
}
public String toInternal(Date val) {
@ -112,12 +154,15 @@ public class DateField extends FieldType {
}
public String indexedToReadable(String indexedForm) {
return indexedForm + 'Z';
return indexedForm + Z;
}
public String toExternal(Fieldable f) {
return indexedToReadable(f.stringValue());
}
public Date toObject(String indexedForm) throws java.text.ParseException {
return getThreadLocalDateFormat().parse(indexedToReadable(indexedForm));
}
@Override
public Date toObject(Fieldable f) {

View File

@ -612,6 +612,122 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
}
public void testDateFacets() {
final String f = "bday";
final String pre = "//lst[@name='facet_dates']/lst[@name='"+f+"']";
assertU(adoc("id", "1", f, "1976-07-04T12:08:56.235Z"));
assertU(adoc("id", "2", f, "1976-07-05T00:00:00.000Z"));
assertU(adoc("id", "3", f, "1976-07-15T00:07:67.890Z"));
assertU(adoc("id", "4", f, "1976-07-21T00:07:67.890Z"));
assertU(adoc("id", "5", f, "1976-07-13T12:12:25.255Z"));
assertU(adoc("id", "6", f, "1976-07-03T17:01:23.456Z"));
assertU(adoc("id", "7", f, "1976-07-12T12:12:25.255Z"));
assertU(adoc("id", "8", f, "1976-07-15T15:15:15.155Z"));
assertU(adoc("id", "9", f, "1907-07-12T13:13:23.235Z"));
assertU(adoc("id", "10", f, "1976-07-03T11:02:45.678Z"));
assertU(adoc("id", "11", f, "1907-07-12T12:12:25.255Z"));
assertU(adoc("id", "12", f, "2007-07-30T07:07:07.070Z"));
assertU(adoc("id", "13", f, "1976-07-30T22:22:22.222Z"));
assertU(adoc("id", "14", f, "1976-07-05T22:22:22.222Z"));
assertU(commit());
assertQ("check counts for month of facet by day",
req( "q", "*:*"
,"rows", "0"
,"facet", "true"
,"facet.date", f
,"facet.date.start", "1976-07-01T00:00:00.000Z"
,"facet.date.end", "1976-07-01T00:00:00.000Z+1MONTH"
,"facet.date.gap", "+1DAY"
,"facet.date.other", "all"
)
// 31 days + pre+post+inner = 34
,"*[count("+pre+"/int)=34]"
,pre+"/int[@name='1976-07-01T00:00:00.000Z'][.='0' ]"
,pre+"/int[@name='1976-07-02T00:00:00.000Z'][.='0' ]"
,pre+"/int[@name='1976-07-03T00:00:00.000Z'][.='2' ]"
// july4th = 2 because exists doc @ 00:00:00.000 on July5
// (date faceting is inclusive)
,pre+"/int[@name='1976-07-04T00:00:00.000Z'][.='2' ]"
,pre+"/int[@name='1976-07-05T00:00:00.000Z'][.='2' ]"
,pre+"/int[@name='1976-07-06T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-07T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-08T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-09T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-10T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-11T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-12T00:00:00.000Z'][.='1' ]"
,pre+"/int[@name='1976-07-13T00:00:00.000Z'][.='1' ]"
,pre+"/int[@name='1976-07-14T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-15T00:00:00.000Z'][.='2' ]"
,pre+"/int[@name='1976-07-16T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-17T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-18T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-19T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-21T00:00:00.000Z'][.='1' ]"
,pre+"/int[@name='1976-07-22T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-23T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-24T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-25T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-26T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-27T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-28T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-29T00:00:00.000Z'][.='0']"
,pre+"/int[@name='1976-07-30T00:00:00.000Z'][.='1' ]"
,pre+"/int[@name='1976-07-31T00:00:00.000Z'][.='0']"
,pre+"/int[@name='before' ][.='2']"
,pre+"/int[@name='after' ][.='1']"
,pre+"/int[@name='between'][.='11']"
);
assertQ("check hardend=false",
req( "q", "*:*"
,"rows", "0"
,"facet", "true"
,"facet.date", f
,"facet.date.start", "1976-07-01T00:00:00.000Z"
,"facet.date.end", "1976-07-13T00:00:00.000Z"
,"facet.date.gap", "+5DAYS"
,"facet.date.other", "all"
,"facet.date.hardend","false"
)
// 3 gaps + pre+post+inner = 6
,"*[count("+pre+"/int)=6]"
,pre+"/int[@name='1976-07-01T00:00:00.000Z'][.='5' ]"
,pre+"/int[@name='1976-07-06T00:00:00.000Z'][.='0' ]"
,pre+"/int[@name='1976-07-11T00:00:00.000Z'][.='4' ]"
,pre+"/int[@name='before' ][.='2']"
,pre+"/int[@name='after' ][.='3']"
,pre+"/int[@name='between'][.='9']"
);
assertQ("check hardend=true",
req( "q", "*:*"
,"rows", "0"
,"facet", "true"
,"facet.date", f
,"facet.date.start", "1976-07-01T00:00:00.000Z"
,"facet.date.end", "1976-07-13T00:00:00.000Z"
,"facet.date.gap", "+5DAYS"
,"facet.date.other", "all"
,"facet.date.hardend","true"
)
// 3 gaps + pre+post+inner = 6
,"*[count("+pre+"/int)=6]"
,pre+"/int[@name='1976-07-01T00:00:00.000Z'][.='5' ]"
,pre+"/int[@name='1976-07-06T00:00:00.000Z'][.='0' ]"
,pre+"/int[@name='1976-07-11T00:00:00.000Z'][.='1' ]"
,pre+"/int[@name='before' ][.='2']"
,pre+"/int[@name='after' ][.='6']"
,pre+"/int[@name='between'][.='6']"
);
}
public void testFacetMultiValued() {
doFacets("t_s");
@ -1129,7 +1245,8 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
// BUT: we can test that crazy combinations of "NOW" all work correctly,
// assuming the test doesn't take too long to run...
assertU(adoc("id", "1", "bday", "1976-07-04T12:08:56.235Z"));
final String july4 = "1976-07-04T12:08:56.235Z";
assertU(adoc("id", "1", "bday", july4));
assertU(adoc("id", "2", "bday", "NOW"));
assertU(adoc("id", "3", "bday", "NOW/HOUR"));
assertU(adoc("id", "4", "bday", "NOW-30MINUTES"));
@ -1137,6 +1254,19 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
assertU(adoc("id", "6", "bday", "NOW+2YEARS"));
assertU(commit());
assertQ("check math on absolute date#1",
req("q", "bday:[* TO "+july4+"/SECOND]"),
"*[count(//doc)=0]");
assertQ("check math on absolute date#2",
req("q", "bday:[* TO "+july4+"/SECOND+1SECOND]"),
"*[count(//doc)=1]");
assertQ("check math on absolute date#3",
req("q", "bday:["+july4+"/SECOND TO "+july4+"/SECOND+1SECOND]"),
"*[count(//doc)=1]");
assertQ("check math on absolute date#4",
req("q", "bday:["+july4+"/MINUTE+1MINUTE TO *]"),
"*[count(//doc)=5]");
assertQ("check count for before now",
req("q", "bday:[* TO NOW]"), "*[count(//doc)=4]");