From 07d361816e3d3130d107959da14c1570f7fefccd Mon Sep 17 00:00:00 2001 From: kimchy Date: Sun, 2 Jan 2011 15:07:45 +0200 Subject: [PATCH] Search: Date Histogram Facet, closes #591. --- .../facet/HistogramFacetSearchBenchmark.java | 14 + .../search/facet/FacetBuilders.java | 5 + .../search/facet/FacetModule.java | 2 + .../search/facet/TransportFacetModule.java | 2 + .../CountDateHistogramFacetCollector.java | 131 ++++++++ .../datehistogram/DateHistogramFacet.java | 159 ++++++++++ .../DateHistogramFacetBuilder.java | 191 ++++++++++++ .../DateHistogramFacetProcessor.java | 235 +++++++++++++++ ...ternalCountAndTotalDateHistogramFacet.java | 283 ++++++++++++++++++ .../InternalCountDateHistogramFacet.java | 250 ++++++++++++++++ .../InternalDateHistogramFacet.java | 38 +++ .../ValueDateHistogramFacetCollector.java | 121 ++++++++ ...alueScriptDateHistogramFacetCollector.java | 161 ++++++++++ .../search/facet/SimpleFacetsTests.java | 95 ++++++ 14 files changed, 1687 insertions(+) create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/CountDateHistogramFacetCollector.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/DateHistogramFacet.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/DateHistogramFacetBuilder.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/DateHistogramFacetProcessor.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/InternalCountAndTotalDateHistogramFacet.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/InternalCountDateHistogramFacet.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/InternalDateHistogramFacet.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueDateHistogramFacetCollector.java create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueScriptDateHistogramFacetCollector.java diff --git a/modules/benchmark/micro/src/main/java/org/elasticsearch/benchmark/search/facet/HistogramFacetSearchBenchmark.java b/modules/benchmark/micro/src/main/java/org/elasticsearch/benchmark/search/facet/HistogramFacetSearchBenchmark.java index c47b8701ae5..86f768accfd 100644 --- a/modules/benchmark/micro/src/main/java/org/elasticsearch/benchmark/search/facet/HistogramFacetSearchBenchmark.java +++ b/modules/benchmark/micro/src/main/java/org/elasticsearch/benchmark/search/facet/HistogramFacetSearchBenchmark.java @@ -129,6 +129,7 @@ public class HistogramFacetSearchBenchmark { System.err.println("--> mismatch on hits"); } } + System.out.println("--> Warmup DONE"); long totalQueryTime = 0; for (int j = 0; j < QUERY_COUNT; j++) { @@ -183,6 +184,19 @@ public class HistogramFacetSearchBenchmark { System.out.println("--> Histogram Facet (date/l_value) " + (totalQueryTime / QUERY_COUNT) + "ms"); + totalQueryTime = 0; + for (int j = 0; j < QUERY_COUNT; j++) { + SearchResponse searchResponse = client.prepareSearch() + .setQuery(matchAllQuery()) + .addFacet(dateHistogramFacet("date").field("date").interval("day")) + .execute().actionGet(); + if (searchResponse.hits().totalHits() != COUNT) { + System.err.println("--> mismatch on hits"); + } + totalQueryTime += searchResponse.tookInMillis(); + } + System.out.println("--> Date Histogram Facet (date) " + (totalQueryTime / QUERY_COUNT) + "ms"); + clientNode.close(); node1.close(); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/FacetBuilders.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/FacetBuilders.java index f1570a2c6cd..ed97364b703 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/FacetBuilders.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/FacetBuilders.java @@ -21,6 +21,7 @@ package org.elasticsearch.search.facet; import org.elasticsearch.index.query.xcontent.XContentFilterBuilder; import org.elasticsearch.index.query.xcontent.XContentQueryBuilder; +import org.elasticsearch.search.facet.datehistogram.DateHistogramFacetBuilder; import org.elasticsearch.search.facet.filter.FilterFacetBuilder; import org.elasticsearch.search.facet.geodistance.GeoDistanceFacetBuilder; import org.elasticsearch.search.facet.histogram.HistogramFacetBuilder; @@ -69,6 +70,10 @@ public class FacetBuilders { return new HistogramFacetBuilder(facetName); } + public static DateHistogramFacetBuilder dateHistogramFacet(String facetName) { + return new DateHistogramFacetBuilder(facetName); + } + public static HistogramScriptFacetBuilder histogramScriptFacet(String facetName) { return new HistogramScriptFacetBuilder(facetName); } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/FacetModule.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/FacetModule.java index 01e9b9abb51..bc01d6daed3 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/FacetModule.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/FacetModule.java @@ -22,6 +22,7 @@ package org.elasticsearch.search.facet; import org.elasticsearch.common.collect.Lists; import org.elasticsearch.common.inject.AbstractModule; import org.elasticsearch.common.inject.multibindings.Multibinder; +import org.elasticsearch.search.facet.datehistogram.DateHistogramFacetProcessor; import org.elasticsearch.search.facet.filter.FilterFacetProcessor; import org.elasticsearch.search.facet.geodistance.GeoDistanceFacetProcessor; import org.elasticsearch.search.facet.histogram.HistogramFacetProcessor; @@ -44,6 +45,7 @@ public class FacetModule extends AbstractModule { processors.add(QueryFacetProcessor.class); processors.add(GeoDistanceFacetProcessor.class); processors.add(HistogramFacetProcessor.class); + processors.add(DateHistogramFacetProcessor.class); processors.add(RangeFacetProcessor.class); processors.add(StatisticalFacetProcessor.class); processors.add(TermsFacetProcessor.class); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/TransportFacetModule.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/TransportFacetModule.java index a22bb5b054c..c831dc79f8c 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/TransportFacetModule.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/TransportFacetModule.java @@ -20,6 +20,7 @@ package org.elasticsearch.search.facet; import org.elasticsearch.common.inject.AbstractModule; +import org.elasticsearch.search.facet.datehistogram.InternalDateHistogramFacet; import org.elasticsearch.search.facet.filter.InternalFilterFacet; import org.elasticsearch.search.facet.geodistance.InternalGeoDistanceFacet; import org.elasticsearch.search.facet.histogram.InternalHistogramFacet; @@ -38,6 +39,7 @@ public class TransportFacetModule extends AbstractModule { InternalQueryFacet.registerStreams(); InternalGeoDistanceFacet.registerStreams(); InternalHistogramFacet.registerStreams(); + InternalDateHistogramFacet.registerStreams(); InternalRangeFacet.registerStreams(); InternalStatisticalFacet.registerStreams(); InternalTermsFacet.registerStreams(); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/CountDateHistogramFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/CountDateHistogramFacetCollector.java new file mode 100644 index 00000000000..f274cf677a0 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/CountDateHistogramFacetCollector.java @@ -0,0 +1,131 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.datehistogram; + +import org.apache.lucene.index.IndexReader; +import org.elasticsearch.common.joda.time.MutableDateTime; +import org.elasticsearch.common.trove.TLongLongHashMap; +import org.elasticsearch.index.cache.field.data.FieldDataCache; +import org.elasticsearch.index.field.data.FieldDataType; +import org.elasticsearch.index.field.data.longs.LongFieldData; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.search.facet.AbstractFacetCollector; +import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.FacetPhaseExecutionException; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; + +/** + * A date histogram facet collector that uses the same field as the key as well as the + * value. + * + * @author kimchy (shay.banon) + */ +public class CountDateHistogramFacetCollector extends AbstractFacetCollector { + + private final String indexFieldName; + + private final MutableDateTime dateTime; + + private final DateHistogramFacet.ComparatorType comparatorType; + + private final FieldDataCache fieldDataCache; + + private final FieldDataType fieldDataType; + + private LongFieldData fieldData; + + private final DateHistogramProc histoProc; + + public CountDateHistogramFacetCollector(String facetName, String fieldName, MutableDateTime dateTime, long interval, DateHistogramFacet.ComparatorType comparatorType, SearchContext context) { + super(facetName); + this.dateTime = dateTime; + this.comparatorType = comparatorType; + this.fieldDataCache = context.fieldDataCache(); + + MapperService.SmartNameFieldMappers smartMappers = context.mapperService().smartName(fieldName); + if (smartMappers == null || !smartMappers.hasMapper()) { + throw new FacetPhaseExecutionException(facetName, "No mapping found for field [" + fieldName + "]"); + } + + // add type filter if there is exact doc mapper associated with it + if (smartMappers.hasDocMapper()) { + setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter())); + } + + FieldMapper mapper = smartMappers.mapper(); + + indexFieldName = mapper.names().indexName(); + fieldDataType = mapper.fieldDataType(); + + if (interval == 1) { + histoProc = new DateHistogramProc(); + } else { + histoProc = new IntervalDateHistogramProc(interval); + } + } + + @Override protected void doCollect(int doc) throws IOException { + fieldData.forEachValueInDoc(doc, dateTime, histoProc); + } + + @Override protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + } + + @Override public Facet facet() { + return new InternalCountDateHistogramFacet(facetName, comparatorType, histoProc.counts()); + } + + public static long bucket(long value, long interval) { + return ((value / interval) * interval); + } + + public static class DateHistogramProc implements LongFieldData.DateValueInDocProc { + + protected final TLongLongHashMap counts = new TLongLongHashMap(); + + @Override public void onValue(int docId, MutableDateTime dateTime) { + counts.adjustOrPutValue(dateTime.getMillis(), 1, 1); + } + + public TLongLongHashMap counts() { + return counts; + } + } + + public static class IntervalDateHistogramProc extends DateHistogramProc { + + private final TLongLongHashMap counts = new TLongLongHashMap(); + + private final long interval; + + public IntervalDateHistogramProc(long interval) { + this.interval = interval; + } + + @Override public void onValue(int docId, MutableDateTime dateTime) { + long bucket = bucket(dateTime.getMillis(), interval); + counts.adjustOrPutValue(bucket, 1, 1); + } + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/DateHistogramFacet.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/DateHistogramFacet.java new file mode 100644 index 00000000000..7ea2aa42700 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/DateHistogramFacet.java @@ -0,0 +1,159 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.datehistogram; + +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.search.facet.Facet; + +import java.util.Comparator; +import java.util.List; + +/** + * A date histogram facet. + * + * @author kimchy (shay.banon) + */ +public interface DateHistogramFacet extends Facet, Iterable { + + /** + * The type of the filter facet. + */ + public static final String TYPE = "date_histogram"; + + /** + * An ordered list of histogram facet entries. + */ + List entries(); + + /** + * An ordered list of histogram facet entries. + */ + List getEntries(); + + public static enum ComparatorType { + TIME((byte) 0, "time", new Comparator() { + + @Override public int compare(Entry o1, Entry o2) { + return (o1.time() < o2.time() ? -1 : (o1.time() == o2.time() ? 0 : 1)); + } + }), + COUNT((byte) 1, "count", new Comparator() { + + @Override public int compare(Entry o1, Entry o2) { + return (o1.count() < o2.count() ? -1 : (o1.count() == o2.count() ? 0 : 1)); + } + }), + TOTAL((byte) 2, "total", new Comparator() { + + @Override public int compare(Entry o1, Entry o2) { + return (o1.total() < o2.total() ? -1 : (o1.total() == o2.total() ? 0 : 1)); + } + }); + + private final byte id; + + private final String description; + + private final Comparator comparator; + + ComparatorType(byte id, String description, Comparator comparator) { + this.id = id; + this.description = description; + this.comparator = comparator; + } + + public byte id() { + return this.id; + } + + public String description() { + return this.description; + } + + public Comparator comparator() { + return comparator; + } + + public static ComparatorType fromId(byte id) { + if (id == 0) { + return TIME; + } else if (id == 1) { + return COUNT; + } else if (id == 2) { + return TOTAL; + } + throw new ElasticSearchIllegalArgumentException("No type argument match for histogram comparator [" + id + "]"); + } + + public static ComparatorType fromString(String type) { + if ("time".equals(type)) { + return TIME; + } else if ("count".equals(type)) { + return COUNT; + } else if ("total".equals(type)) { + return TOTAL; + } + throw new ElasticSearchIllegalArgumentException("No type argument match for histogram comparator [" + type + "]"); + } + } + + public interface Entry { + + /** + * The time bucket start (in milliseconds). + */ + long time(); + + /** + * The time bucket start (in milliseconds). + */ + long getTime(); + + /** + * The number of hits that fall within that key "range" or "interval". + */ + long count(); + + /** + * The number of hits that fall within that key "range" or "interval". + */ + long getCount(); + + /** + * The sum / total of the value field that fall within this key "interval". + */ + double total(); + + /** + * The sum / total of the value field that fall within this key "interval". + */ + double getTotal(); + + /** + * The mean of this facet interval. + */ + double mean(); + + /** + * The mean of this facet interval. + */ + double getMean(); + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/DateHistogramFacetBuilder.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/DateHistogramFacetBuilder.java new file mode 100644 index 00000000000..cddac397797 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/DateHistogramFacetBuilder.java @@ -0,0 +1,191 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.datehistogram; + +import org.elasticsearch.common.collect.Maps; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.query.xcontent.XContentFilterBuilder; +import org.elasticsearch.search.builder.SearchSourceBuilderException; +import org.elasticsearch.search.facet.AbstractFacetBuilder; + +import java.io.IOException; +import java.util.Map; + +/** + * A facet builder of date histogram facets. + * + * @author kimchy (shay.banon) + */ +public class DateHistogramFacetBuilder extends AbstractFacetBuilder { + private String keyFieldName; + private String valueFieldName; + private String interval = null; + private String zone = null; + private DateHistogramFacet.ComparatorType comparatorType; + + private String valueScript; + private Map params; + private String lang; + + /** + * Constructs a new date histogram facet with the provided facet logical name. + * + * @param name The logical name of the facet + */ + public DateHistogramFacetBuilder(String name) { + super(name); + } + + /** + * The field name to perform the histogram facet. Translates to perform the histogram facet + * using the provided field as both the {@link #keyField(String)} and {@link #valueField(String)}. + */ + public DateHistogramFacetBuilder field(String field) { + this.keyFieldName = field; + return this; + } + + /** + * The field name to use in order to control where the hit will "fall into" within the histogram + * entries. Essentially, using the key field numeric value, the hit will be "rounded" into the relevant + * bucket controlled by the interval. + */ + public DateHistogramFacetBuilder keyField(String keyField) { + this.keyFieldName = keyField; + return this; + } + + /** + * The field name to use as the value of the hit to compute data based on values within the interval + * (for example, total). + */ + public DateHistogramFacetBuilder valueField(String valueField) { + this.valueFieldName = valueField; + return this; + } + + public DateHistogramFacetBuilder valueScript(String valueScript) { + this.valueScript = valueScript; + return this; + } + + public DateHistogramFacetBuilder param(String name, Object value) { + if (params == null) { + params = Maps.newHashMap(); + } + params.put(name, value); + return this; + } + + /** + * The language of the value script. + */ + public DateHistogramFacetBuilder lang(String lang) { + this.lang = lang; + return this; + } + + /** + * The interval used to control the bucket "size" where each key value of a hit will fall into. Check + * the docs for all available values. + */ + public DateHistogramFacetBuilder interval(String interval) { + this.interval = interval; + return this; + } + + /** + * Sets the time zone to use when bucketing the values. Can either be in the form of "-10:00" or + * one of the values listed here: http://joda-time.sourceforge.net/timezones.html. + */ + public DateHistogramFacetBuilder zone(String zone) { + this.zone = zone; + return this; + } + + public DateHistogramFacetBuilder comparator(DateHistogramFacet.ComparatorType comparatorType) { + this.comparatorType = comparatorType; + return this; + } + + /** + * Should the facet run in global mode (not bounded by the search query) or not (bounded by + * the search query). Defaults to false. + */ + public DateHistogramFacetBuilder global(boolean global) { + super.global(global); + return this; + } + + /** + * Marks the facet to run in a specific scope. + */ + @Override public DateHistogramFacetBuilder scope(String scope) { + super.scope(scope); + return this; + } + + /** + * An additional filter used to further filter down the set of documents the facet will run on. + */ + public DateHistogramFacetBuilder facetFilter(XContentFilterBuilder filter) { + this.facetFilter = filter; + return this; + } + + @Override public void toXContent(XContentBuilder builder, Params params) throws IOException { + if (keyFieldName == null) { + throw new SearchSourceBuilderException("field must be set on date histogram facet for facet [" + name + "]"); + } + if (interval == null) { + throw new SearchSourceBuilderException("interval must be set on date histogram facet for facet [" + name + "]"); + } + builder.startObject(name); + + builder.startObject(DateHistogramFacet.TYPE); + if (valueFieldName != null) { + builder.field("key_field", keyFieldName); + builder.field("value_field", valueFieldName); + } else { + builder.field("field", keyFieldName); + } + if (valueScript != null) { + builder.field("value_script", valueScript); + if (lang != null) { + builder.field("lang", lang); + } + if (this.params != null) { + builder.field("params", this.params); + } + } + builder.field("interval", interval); + if (zone != null) { + builder.field("time_zone", zone); + } + if (comparatorType != null) { + builder.field("comparator", comparatorType.description()); + } + builder.endObject(); + + addFilterFacetAndGlobal(builder, params); + + builder.endObject(); + } +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/DateHistogramFacetProcessor.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/DateHistogramFacetProcessor.java new file mode 100644 index 00000000000..144d4f4b932 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/DateHistogramFacetProcessor.java @@ -0,0 +1,235 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.datehistogram; + +import org.elasticsearch.common.collect.ImmutableMap; +import org.elasticsearch.common.collect.MapBuilder; +import org.elasticsearch.common.component.AbstractComponent; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.joda.time.Chronology; +import org.elasticsearch.common.joda.time.DateTimeField; +import org.elasticsearch.common.joda.time.DateTimeZone; +import org.elasticsearch.common.joda.time.MutableDateTime; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.trove.ExtTObjectIntHasMap; +import org.elasticsearch.common.trove.TObjectIntHashMap; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.field.data.FieldDataType; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.FacetCollector; +import org.elasticsearch.search.facet.FacetPhaseExecutionException; +import org.elasticsearch.search.facet.FacetProcessor; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +/** + * @author kimchy (shay.banon) + */ +public class DateHistogramFacetProcessor extends AbstractComponent implements FacetProcessor { + + private final ImmutableMap dateFieldParsers; + private final TObjectIntHashMap rounding = new ExtTObjectIntHasMap().defaultReturnValue(MutableDateTime.ROUND_FLOOR); + + @Inject public DateHistogramFacetProcessor(Settings settings) { + super(settings); + InternalDateHistogramFacet.registerStreams(); + + dateFieldParsers = MapBuilder.newMapBuilder() + .put("year", new DateFieldParser.YearOfCentury()) + .put("1y", new DateFieldParser.YearOfCentury()) + .put("month", new DateFieldParser.MonthOfYear()) + .put("1m", new DateFieldParser.MonthOfYear()) + .put("day", new DateFieldParser.DayOfMonth()) + .put("1d", new DateFieldParser.DayOfMonth()) + .put("hour", new DateFieldParser.HourOfDay()) + .put("1h", new DateFieldParser.HourOfDay()) + .put("minute", new DateFieldParser.MinuteOfHour()) + .put("1m", new DateFieldParser.MinuteOfHour()) + .put("second", new DateFieldParser.SecondOfMinute()) + .put("1s", new DateFieldParser.SecondOfMinute()) + .immutableMap(); + + rounding.put("floor", MutableDateTime.ROUND_FLOOR); + rounding.put("ceiling", MutableDateTime.ROUND_CEILING); + rounding.put("half_even", MutableDateTime.ROUND_HALF_EVEN); + rounding.put("halfEven", MutableDateTime.ROUND_HALF_EVEN); + rounding.put("half_floor", MutableDateTime.ROUND_HALF_FLOOR); + rounding.put("halfFloor", MutableDateTime.ROUND_HALF_FLOOR); + rounding.put("half_ceiling", MutableDateTime.ROUND_HALF_CEILING); + rounding.put("halfCeiling", MutableDateTime.ROUND_HALF_CEILING); + } + + @Override public String[] types() { + return new String[]{DateHistogramFacet.TYPE, "dateHistogram"}; + } + + @Override public FacetCollector parse(String facetName, XContentParser parser, SearchContext context) throws IOException { + String keyField = null; + String valueField = null; + String valueScript = null; + String scriptLang = null; + Map params = null; + boolean intervalSet = false; + long interval = 1; + String sInterval = null; + MutableDateTime dateTime = new MutableDateTime(DateTimeZone.UTC); + DateHistogramFacet.ComparatorType comparatorType = DateHistogramFacet.ComparatorType.TIME; + XContentParser.Token token; + String fieldName = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + fieldName = parser.currentName(); + } else if (token == XContentParser.Token.START_OBJECT) { + if ("params".equals(fieldName)) { + params = parser.map(); + } + } else if (token.isValue()) { + if ("field".equals(fieldName)) { + keyField = parser.text(); + } else if ("key_field".equals(fieldName) || "keyField".equals(fieldName)) { + keyField = parser.text(); + } else if ("value_field".equals(fieldName) || "valueField".equals(fieldName)) { + valueField = parser.text(); + } else if ("interval".equals(fieldName)) { + intervalSet = true; + if (token == XContentParser.Token.VALUE_NUMBER) { + interval = parser.longValue(); + } else { + sInterval = parser.text(); + } + } else if ("time_zone".equals(fieldName) || "timeZone".equals(fieldName)) { + if (token == XContentParser.Token.VALUE_NUMBER) { + dateTime.setZone(DateTimeZone.forOffsetHours(parser.intValue())); + } else { + String text = parser.text(); + int index = text.indexOf(':'); + if (index != -1) { + // format like -02:30 + dateTime.setZone(DateTimeZone.forOffsetHoursMinutes( + Integer.parseInt(text.substring(0, index)), + Integer.parseInt(text.substring(index + 1)) + )); + } else { + // id, listed here: http://joda-time.sourceforge.net/timezones.html + dateTime.setZone(DateTimeZone.forID(text)); + } + } + } else if ("value_script".equals(fieldName) || "valueScript".equals(fieldName)) { + valueScript = parser.text(); + } else if ("order".equals(fieldName) || "comparator".equals(fieldName)) { + comparatorType = DateHistogramFacet.ComparatorType.fromString(parser.text()); + } else if ("lang".equals(fieldName)) { + scriptLang = parser.text(); + } + } + } + + if (keyField == null) { + throw new FacetPhaseExecutionException(facetName, "key field is required to be set for histogram facet, either using [field] or using [key_field]"); + } + + FieldMapper mapper = context.mapperService().smartNameFieldMapper(keyField); + if (mapper.fieldDataType() != FieldDataType.DefaultTypes.LONG) { + throw new FacetPhaseExecutionException(facetName, "(key) field [" + keyField + "] is not of type date"); + } + + if (!intervalSet) { + throw new FacetPhaseExecutionException(facetName, "[interval] is required to be set for histogram facet"); + } + + // we set the rounding after we set the zone, for it to take affect + if (sInterval != null) { + int index = sInterval.indexOf(':'); + if (index != -1) { + // set with rounding + DateTimeField field = dateFieldParsers.get(sInterval.substring(0, index)).parse(dateTime.getChronology()); + dateTime.setRounding(field, rounding.get(sInterval.substring(index + 1))); + } else { + DateTimeField field = dateFieldParsers.get(sInterval).parse(dateTime.getChronology()); + if (field != null) { + dateTime.setRounding(field, MutableDateTime.ROUND_FLOOR); + } else { + // time interval + interval = TimeValue.parseTimeValue(parser.text(), null).millis(); + } + } + } + + + if (valueScript != null) { + return new ValueScriptDateHistogramFacetCollector(facetName, keyField, scriptLang, valueScript, params, dateTime, interval, comparatorType, context); + } else if (valueField == null) { + return new CountDateHistogramFacetCollector(facetName, keyField, dateTime, interval, comparatorType, context); + } else { + return new ValueDateHistogramFacetCollector(facetName, keyField, valueField, dateTime, interval, comparatorType, context); + } + } + + @Override public Facet reduce(String name, List facets) { + InternalDateHistogramFacet first = (InternalDateHistogramFacet) facets.get(0); + return first.reduce(name, facets); + } + + static interface DateFieldParser { + + DateTimeField parse(Chronology chronology); + + static class YearOfCentury implements DateFieldParser { + @Override public DateTimeField parse(Chronology chronology) { + return chronology.yearOfCentury(); + } + } + + static class MonthOfYear implements DateFieldParser { + @Override public DateTimeField parse(Chronology chronology) { + return chronology.monthOfYear(); + } + } + + static class DayOfMonth implements DateFieldParser { + @Override public DateTimeField parse(Chronology chronology) { + return chronology.dayOfMonth(); + } + } + + static class HourOfDay implements DateFieldParser { + @Override public DateTimeField parse(Chronology chronology) { + return chronology.hourOfDay(); + } + } + + static class MinuteOfHour implements DateFieldParser { + @Override public DateTimeField parse(Chronology chronology) { + return chronology.minuteOfHour(); + } + } + + static class SecondOfMinute implements DateFieldParser { + @Override public DateTimeField parse(Chronology chronology) { + return chronology.secondOfMinute(); + } + } + } +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/InternalCountAndTotalDateHistogramFacet.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/InternalCountAndTotalDateHistogramFacet.java new file mode 100644 index 00000000000..17516618f38 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/InternalCountAndTotalDateHistogramFacet.java @@ -0,0 +1,283 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.datehistogram; + +import org.elasticsearch.common.collect.ImmutableList; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.trove.TLongDoubleHashMap; +import org.elasticsearch.common.trove.TLongDoubleIterator; +import org.elasticsearch.common.trove.TLongLongHashMap; +import org.elasticsearch.common.trove.TLongLongIterator; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentBuilderString; +import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.histogram.HistogramFacet; + +import java.io.IOException; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.TreeSet; + +/** + * @author kimchy (shay.banon) + */ +public class InternalCountAndTotalDateHistogramFacet extends InternalDateHistogramFacet { + + private static final String STREAM_TYPE = "ctdHistogram"; + + public static void registerStreams() { + Streams.registerStream(STREAM, STREAM_TYPE); + } + + static Stream STREAM = new Stream() { + @Override public Facet readFacet(String type, StreamInput in) throws IOException { + return readHistogramFacet(in); + } + }; + + @Override public String streamType() { + return STREAM_TYPE; + } + + + /** + * A histogram entry representing a single entry within the result of a histogram facet. + */ + public class CountAndTotalEntry implements Entry { + private final long time; + private final long count; + private final double total; + + public CountAndTotalEntry(long time, long count, double total) { + this.time = time; + this.count = count; + this.total = total; + } + + public long time() { + return time; + } + + public long getTime() { + return time(); + } + + public long count() { + return count; + } + + public long getCount() { + return count(); + } + + public double total() { + return total; + } + + public double getTotal() { + return total(); + } + + public double mean() { + return total / count; + } + + public double getMean() { + return mean(); + } + } + + private String name; + + private ComparatorType comparatorType; + + TLongLongHashMap counts; + + TLongDoubleHashMap totals; + + Collection entries = null; + + private InternalCountAndTotalDateHistogramFacet() { + } + + public InternalCountAndTotalDateHistogramFacet(String name, ComparatorType comparatorType, TLongLongHashMap counts, TLongDoubleHashMap totals) { + this.name = name; + this.comparatorType = comparatorType; + this.counts = counts; + this.totals = totals; + } + + @Override public String name() { + return this.name; + } + + @Override public String getName() { + return name(); + } + + @Override public String type() { + return TYPE; + } + + @Override public String getType() { + return type(); + } + + @Override public List entries() { + computeEntries(); + if (!(entries instanceof List)) { + entries = ImmutableList.copyOf(entries); + } + return (List) entries; + } + + @Override public List getEntries() { + return entries(); + } + + @Override public Iterator iterator() { + return (Iterator) computeEntries().iterator(); + } + + private Collection computeEntries() { + if (entries != null) { + return entries; + } + TreeSet set = new TreeSet(comparatorType.comparator()); + for (TLongLongIterator it = counts.iterator(); it.hasNext();) { + it.advance(); + set.add(new CountAndTotalEntry(it.key(), it.value(), totals.get(it.key()))); + } + entries = set; + return entries; + } + + @Override public Facet reduce(String name, List facets) { + if (facets.size() == 1) { + return facets.get(0); + } + TLongLongHashMap counts = null; + TLongDoubleHashMap totals = null; + + InternalCountAndTotalDateHistogramFacet firstHistoFacet = (InternalCountAndTotalDateHistogramFacet) facets.get(0); + for (Facet facet : facets) { + InternalCountAndTotalDateHistogramFacet histoFacet = (InternalCountAndTotalDateHistogramFacet) facet; + if (!histoFacet.counts.isEmpty()) { + if (counts == null) { + counts = histoFacet.counts; + } else { + for (TLongLongIterator it = histoFacet.counts.iterator(); it.hasNext();) { + it.advance(); + counts.adjustOrPutValue(it.key(), it.value(), it.value()); + } + } + } + + if (!histoFacet.totals.isEmpty()) { + if (totals == null) { + totals = histoFacet.totals; + } else { + for (TLongDoubleIterator it = histoFacet.totals.iterator(); it.hasNext();) { + it.advance(); + totals.adjustOrPutValue(it.key(), it.value(), it.value()); + } + } + } + } + if (counts == null) { + counts = InternalCountAndTotalDateHistogramFacet.EMPTY_LONG_LONG_MAP; + } + if (totals == null) { + totals = InternalCountAndTotalDateHistogramFacet.EMPTY_LONG_DOUBLE_MAP; + } + firstHistoFacet.counts = counts; + firstHistoFacet.totals = totals; + + return firstHistoFacet; + } + + static final class Fields { + static final XContentBuilderString _TYPE = new XContentBuilderString("_type"); + static final XContentBuilderString ENTRIES = new XContentBuilderString("entries"); + static final XContentBuilderString TIME = new XContentBuilderString("time"); + static final XContentBuilderString COUNT = new XContentBuilderString("count"); + static final XContentBuilderString TOTAL = new XContentBuilderString("total"); + static final XContentBuilderString MEAN = new XContentBuilderString("mean"); + } + + @Override public void toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(name); + builder.field(Fields._TYPE, HistogramFacet.TYPE); + builder.startArray(Fields.ENTRIES); + for (Entry entry : computeEntries()) { + builder.startObject(); + builder.field(Fields.TIME, entry.time()); + builder.field(Fields.COUNT, entry.count()); + builder.field(Fields.TOTAL, entry.total()); + builder.field(Fields.MEAN, entry.mean()); + builder.endObject(); + } + builder.endArray(); + builder.endObject(); + } + + public static InternalCountAndTotalDateHistogramFacet readHistogramFacet(StreamInput in) throws IOException { + InternalCountAndTotalDateHistogramFacet facet = new InternalCountAndTotalDateHistogramFacet(); + facet.readFrom(in); + return facet; + } + + @Override public void readFrom(StreamInput in) throws IOException { + name = in.readUTF(); + comparatorType = ComparatorType.fromId(in.readByte()); + + int size = in.readVInt(); + if (size == 0) { + counts = EMPTY_LONG_LONG_MAP; + totals = EMPTY_LONG_DOUBLE_MAP; + } else { + counts = new TLongLongHashMap(size); + totals = new TLongDoubleHashMap(size); + for (int i = 0; i < size; i++) { + long key = in.readLong(); + counts.put(key, in.readVLong()); + totals.put(key, in.readDouble()); + } + } + } + + @Override public void writeTo(StreamOutput out) throws IOException { + out.writeUTF(name); + out.writeByte(comparatorType.id()); + // optimize the write, since we know we have the same buckets as keys + out.writeVInt(counts.size()); + for (TLongLongIterator it = counts.iterator(); it.hasNext();) { + it.advance(); + out.writeLong(it.key()); + out.writeVLong(it.value()); + out.writeDouble(totals.get(it.key())); + } + } + + static final TLongLongHashMap EMPTY_LONG_LONG_MAP = new TLongLongHashMap(); + static final TLongDoubleHashMap EMPTY_LONG_DOUBLE_MAP = new TLongDoubleHashMap(); +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/InternalCountDateHistogramFacet.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/InternalCountDateHistogramFacet.java new file mode 100644 index 00000000000..8c02c20b8b7 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/InternalCountDateHistogramFacet.java @@ -0,0 +1,250 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.datehistogram; + +import org.elasticsearch.common.collect.ImmutableList; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.trove.TLongLongHashMap; +import org.elasticsearch.common.trove.TLongLongIterator; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentBuilderString; +import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.histogram.HistogramFacet; + +import java.io.IOException; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.TreeSet; + +/** + * @author kimchy (shay.banon) + */ +public class InternalCountDateHistogramFacet extends InternalDateHistogramFacet { + + private static final String STREAM_TYPE = "cdHistogram"; + + public static void registerStreams() { + Streams.registerStream(STREAM, STREAM_TYPE); + } + + static Stream STREAM = new Stream() { + @Override public Facet readFacet(String type, StreamInput in) throws IOException { + return readHistogramFacet(in); + } + }; + + @Override public String streamType() { + return STREAM_TYPE; + } + + + /** + * A histogram entry representing a single entry within the result of a histogram facet. + */ + public class CountEntry implements Entry { + private final long time; + private final long count; + + public CountEntry(long time, long count) { + this.time = time; + this.count = count; + } + + public long time() { + return time; + } + + public long getTime() { + return time(); + } + + public long count() { + return count; + } + + public long getCount() { + return count(); + } + + public double total() { + return -1; + } + + public double getTotal() { + return total(); + } + + public double mean() { + return -1; + } + + public double getMean() { + return mean(); + } + } + + private String name; + + private ComparatorType comparatorType; + + TLongLongHashMap counts; + + Collection entries = null; + + private InternalCountDateHistogramFacet() { + } + + public InternalCountDateHistogramFacet(String name, ComparatorType comparatorType, TLongLongHashMap counts) { + this.name = name; + this.comparatorType = comparatorType; + this.counts = counts; + } + + @Override public String name() { + return this.name; + } + + @Override public String getName() { + return name(); + } + + @Override public String type() { + return TYPE; + } + + @Override public String getType() { + return type(); + } + + @Override public List entries() { + computeEntries(); + if (!(entries instanceof List)) { + entries = ImmutableList.copyOf(entries); + } + return (List) entries; + } + + @Override public List getEntries() { + return entries(); + } + + @Override public Iterator iterator() { + return (Iterator) computeEntries().iterator(); + } + + private Collection computeEntries() { + if (entries != null) { + return entries; + } + TreeSet set = new TreeSet(comparatorType.comparator()); + for (TLongLongIterator it = counts.iterator(); it.hasNext();) { + it.advance(); + set.add(new CountEntry(it.key(), it.value())); + } + entries = set; + return entries; + } + + @Override public Facet reduce(String name, List facets) { + if (facets.size() == 1) { + return facets.get(0); + } + TLongLongHashMap counts = null; + + InternalCountDateHistogramFacet firstHistoFacet = (InternalCountDateHistogramFacet) facets.get(0); + for (Facet facet : facets) { + InternalCountDateHistogramFacet histoFacet = (InternalCountDateHistogramFacet) facet; + if (!histoFacet.counts.isEmpty()) { + if (counts == null) { + counts = histoFacet.counts; + } else { + for (TLongLongIterator it = histoFacet.counts.iterator(); it.hasNext();) { + it.advance(); + counts.adjustOrPutValue(it.key(), it.value(), it.value()); + } + } + } + } + if (counts == null) { + counts = InternalCountDateHistogramFacet.EMPTY_LONG_LONG_MAP; + } + firstHistoFacet.counts = counts; + + return firstHistoFacet; + } + + static final class Fields { + static final XContentBuilderString _TYPE = new XContentBuilderString("_type"); + static final XContentBuilderString ENTRIES = new XContentBuilderString("entries"); + static final XContentBuilderString TIME = new XContentBuilderString("time"); + static final XContentBuilderString COUNT = new XContentBuilderString("count"); + } + + @Override public void toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(name); + builder.field(Fields._TYPE, HistogramFacet.TYPE); + builder.startArray(Fields.ENTRIES); + for (Entry entry : computeEntries()) { + builder.startObject(); + builder.field(Fields.TIME, entry.time()); + builder.field(Fields.COUNT, entry.count()); + builder.endObject(); + } + builder.endArray(); + builder.endObject(); + } + + public static InternalCountDateHistogramFacet readHistogramFacet(StreamInput in) throws IOException { + InternalCountDateHistogramFacet facet = new InternalCountDateHistogramFacet(); + facet.readFrom(in); + return facet; + } + + @Override public void readFrom(StreamInput in) throws IOException { + name = in.readUTF(); + comparatorType = ComparatorType.fromId(in.readByte()); + + int size = in.readVInt(); + if (size == 0) { + counts = EMPTY_LONG_LONG_MAP; + } else { + counts = new TLongLongHashMap(size); + for (int i = 0; i < size; i++) { + long key = in.readVLong(); + counts.put(key, in.readVLong()); + } + } + } + + @Override public void writeTo(StreamOutput out) throws IOException { + out.writeUTF(name); + out.writeByte(comparatorType.id()); + out.writeVInt(counts.size()); + for (TLongLongIterator it = counts.iterator(); it.hasNext();) { + it.advance(); + out.writeVLong(it.key()); + out.writeVLong(it.value()); + } + } + + static final TLongLongHashMap EMPTY_LONG_LONG_MAP = new TLongLongHashMap(); +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/InternalDateHistogramFacet.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/InternalDateHistogramFacet.java new file mode 100644 index 00000000000..88f6fb3a585 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/InternalDateHistogramFacet.java @@ -0,0 +1,38 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.datehistogram; + +import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.InternalFacet; + +import java.util.List; + +/** + * @author kimchy (shay.banon) + */ +public abstract class InternalDateHistogramFacet implements DateHistogramFacet, InternalFacet { + + public static void registerStreams() { + InternalCountDateHistogramFacet.registerStreams(); + InternalCountAndTotalDateHistogramFacet.registerStreams(); + } + + public abstract Facet reduce(String name, List facets); +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueDateHistogramFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueDateHistogramFacetCollector.java new file mode 100644 index 00000000000..ab43b4076ad --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueDateHistogramFacetCollector.java @@ -0,0 +1,121 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.datehistogram; + +import org.apache.lucene.index.IndexReader; +import org.elasticsearch.common.joda.time.MutableDateTime; +import org.elasticsearch.common.trove.TLongDoubleHashMap; +import org.elasticsearch.common.trove.TLongLongHashMap; +import org.elasticsearch.index.cache.field.data.FieldDataCache; +import org.elasticsearch.index.field.data.FieldDataType; +import org.elasticsearch.index.field.data.NumericFieldData; +import org.elasticsearch.index.field.data.longs.LongFieldData; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.search.facet.AbstractFacetCollector; +import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.FacetPhaseExecutionException; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; + +/** + * A histogram facet collector that uses different fields for the key and the value. + * + * @author kimchy (shay.banon) + */ +public class ValueDateHistogramFacetCollector extends AbstractFacetCollector { + + private final String keyIndexFieldName; + private final String valueIndexFieldName; + + private MutableDateTime dateTime; + private final long interval; + + private final DateHistogramFacet.ComparatorType comparatorType; + + private final FieldDataCache fieldDataCache; + + private final FieldDataType keyFieldDataType; + private LongFieldData keyFieldData; + + private final FieldDataType valueFieldDataType; + private NumericFieldData valueFieldData; + + private final TLongLongHashMap counts = new TLongLongHashMap(); + private final TLongDoubleHashMap totals = new TLongDoubleHashMap(); + + public ValueDateHistogramFacetCollector(String facetName, String keyFieldName, String valueFieldName, MutableDateTime dateTime, long interval, DateHistogramFacet.ComparatorType comparatorType, SearchContext context) { + super(facetName); + this.dateTime = dateTime; + this.interval = interval; + this.comparatorType = comparatorType; + this.fieldDataCache = context.fieldDataCache(); + + MapperService.SmartNameFieldMappers smartMappers = context.mapperService().smartName(keyFieldName); + if (smartMappers == null || !smartMappers.hasMapper()) { + throw new FacetPhaseExecutionException(facetName, "No mapping found for field [" + keyFieldName + "]"); + } + + // add type filter if there is exact doc mapper associated with it + if (smartMappers.hasDocMapper()) { + setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter())); + } + + keyIndexFieldName = smartMappers.mapper().names().indexName(); + keyFieldDataType = smartMappers.mapper().fieldDataType(); + + FieldMapper mapper = context.mapperService().smartNameFieldMapper(valueFieldName); + if (mapper == null) { + throw new FacetPhaseExecutionException(facetName, "No mapping found for value_field [" + valueFieldName + "]"); + } + valueIndexFieldName = mapper.names().indexName(); + valueFieldDataType = mapper.fieldDataType(); + } + + @Override protected void doCollect(int doc) throws IOException { + // single key value, compute the bucket once + keyFieldData.date(doc, dateTime); + long time = dateTime.getMillis(); + if (interval != 1) { + time = CountDateHistogramFacetCollector.bucket(time, interval); + } + if (valueFieldData.multiValued()) { + for (double value : valueFieldData.doubleValues(doc)) { + counts.adjustOrPutValue(time, 1, 1); + totals.adjustOrPutValue(time, value, value); + } + } else { + // both key and value are not multi valued + double value = valueFieldData.doubleValue(doc); + counts.adjustOrPutValue(time, 1, 1); + totals.adjustOrPutValue(time, value, value); + } + } + + @Override protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + keyFieldData = (LongFieldData) fieldDataCache.cache(keyFieldDataType, reader, keyIndexFieldName); + valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, reader, valueIndexFieldName); + } + + @Override public Facet facet() { + return new InternalCountAndTotalDateHistogramFacet(facetName, comparatorType, counts, totals); + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueScriptDateHistogramFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueScriptDateHistogramFacetCollector.java new file mode 100644 index 00000000000..b983e5ab609 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueScriptDateHistogramFacetCollector.java @@ -0,0 +1,161 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.datehistogram; + +import org.apache.lucene.index.IndexReader; +import org.elasticsearch.common.joda.time.MutableDateTime; +import org.elasticsearch.common.trove.TLongDoubleHashMap; +import org.elasticsearch.common.trove.TLongLongHashMap; +import org.elasticsearch.index.cache.field.data.FieldDataCache; +import org.elasticsearch.index.field.data.FieldDataType; +import org.elasticsearch.index.field.data.longs.LongFieldData; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.script.search.SearchScript; +import org.elasticsearch.search.facet.AbstractFacetCollector; +import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.FacetPhaseExecutionException; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.Map; + +/** + * A histogram facet collector that uses the same field as the key as well as the + * value. + * + * @author kimchy (shay.banon) + */ +public class ValueScriptDateHistogramFacetCollector extends AbstractFacetCollector { + + private final String indexFieldName; + + private final MutableDateTime dateTime; + + private final DateHistogramFacet.ComparatorType comparatorType; + + private final FieldDataCache fieldDataCache; + + private final FieldDataType fieldDataType; + + private LongFieldData fieldData; + + private final SearchScript valueScript; + + private final DateHistogramProc histoProc; + + public ValueScriptDateHistogramFacetCollector(String facetName, String fieldName, String scriptLang, String valueScript, Map params, MutableDateTime dateTime, long interval, DateHistogramFacet.ComparatorType comparatorType, SearchContext context) { + super(facetName); + this.dateTime = dateTime; + this.comparatorType = comparatorType; + this.fieldDataCache = context.fieldDataCache(); + + MapperService.SmartNameFieldMappers smartMappers = context.mapperService().smartName(fieldName); + if (smartMappers == null || !smartMappers.hasMapper()) { + throw new FacetPhaseExecutionException(facetName, "No mapping found for field [" + fieldName + "]"); + } + + // add type filter if there is exact doc mapper associated with it + if (smartMappers.hasDocMapper()) { + setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter())); + } + + this.valueScript = new SearchScript(context.lookup(), scriptLang, valueScript, params, context.scriptService()); + + FieldMapper mapper = smartMappers.mapper(); + + indexFieldName = mapper.names().indexName(); + fieldDataType = mapper.fieldDataType(); + + if (interval == 1) { + histoProc = new DateHistogramProc(this.valueScript); + } else { + histoProc = new IntervalDateHistogramProc(interval, this.valueScript); + } + } + + @Override protected void doCollect(int doc) throws IOException { + fieldData.forEachValueInDoc(doc, dateTime, histoProc); + } + + @Override protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + valueScript.setNextReader(reader); + } + + @Override public Facet facet() { + return new InternalCountAndTotalDateHistogramFacet(facetName, comparatorType, histoProc.counts(), histoProc.totals()); + } + + public static class DateHistogramProc implements LongFieldData.DateValueInDocProc { + + protected final SearchScript valueScript; + + protected final TLongLongHashMap counts = new TLongLongHashMap(); + + protected final TLongDoubleHashMap totals = new TLongDoubleHashMap(); + + public DateHistogramProc(SearchScript valueScript) { + this.valueScript = valueScript; + } + + @Override public void onValue(int docId, MutableDateTime dateTime) { + long time = dateTime.getMillis(); + counts.adjustOrPutValue(time, 1, 1); + double scriptValue = ((Number) valueScript.execute(docId)).doubleValue(); + totals.adjustOrPutValue(time, scriptValue, scriptValue); + } + + + public TLongLongHashMap counts() { + return counts; + } + + public TLongDoubleHashMap totals() { + return totals; + } + } + + public static class IntervalDateHistogramProc extends DateHistogramProc { + + private final long interval; + + public IntervalDateHistogramProc(long interval, SearchScript valueScript) { + super(valueScript); + this.interval = interval; + } + + + @Override public void onValue(int docId, MutableDateTime dateTime) { + long bucket = CountDateHistogramFacetCollector.bucket(dateTime.getMillis(), interval); + counts.adjustOrPutValue(bucket, 1, 1); + double scriptValue = ((Number) valueScript.execute(docId)).doubleValue(); + totals.adjustOrPutValue(bucket, scriptValue, scriptValue); + } + + public TLongLongHashMap counts() { + return counts; + } + + public TLongDoubleHashMap totals() { + return totals; + } + } +} \ No newline at end of file diff --git a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java index 97673652605..114feee018f 100644 --- a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java +++ b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java @@ -23,7 +23,10 @@ import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchType; import org.elasticsearch.action.search.ShardSearchFailure; import org.elasticsearch.client.Client; +import org.elasticsearch.common.joda.time.DateTimeZone; +import org.elasticsearch.common.joda.time.format.ISODateTimeFormat; import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.search.facet.datehistogram.DateHistogramFacet; import org.elasticsearch.search.facet.filter.FilterFacet; import org.elasticsearch.search.facet.histogram.HistogramFacet; import org.elasticsearch.search.facet.range.RangeFacet; @@ -905,4 +908,96 @@ public class SimpleFacetsTests extends AbstractNodesTests { assertThat(facet.entries().get(2).count(), equalTo(1l)); assertThat(facet.entries().get(2).fromAsString(), equalTo("1970-01-01T00:00:26")); } + + @Test public void testDateHistoFacets() throws Exception { + try { + client.admin().indices().prepareDelete("test").execute().actionGet(); + } catch (Exception e) { + // ignore + } + client.admin().indices().prepareCreate("test").execute().actionGet(); + client.admin().cluster().prepareHealth().setWaitForGreenStatus().execute().actionGet(); + + client.prepareIndex("test", "type1").setSource(jsonBuilder().startObject() + .field("date", "2009-03-05T01:01:01") + .field("num", 1) + .endObject()).execute().actionGet(); + client.admin().indices().prepareFlush().setRefresh(true).execute().actionGet(); + + client.prepareIndex("test", "type1").setSource(jsonBuilder().startObject() + .field("date", "2009-03-05T04:01:01") + .field("num", 2) + .endObject()).execute().actionGet(); + client.admin().indices().prepareRefresh().execute().actionGet(); + + client.prepareIndex("test", "type1").setSource(jsonBuilder().startObject() + .field("date", "2009-03-06T01:01:01") + .field("num", 3) + .endObject()).execute().actionGet(); + client.admin().indices().prepareRefresh().execute().actionGet(); + + + SearchResponse searchResponse = client.prepareSearch() + .setQuery(matchAllQuery()) + .addFacet(dateHistogramFacet("stats1").field("date").interval("day")) + .addFacet(dateHistogramFacet("stats2").field("date").interval("day").zone("-02:00")) + .addFacet(dateHistogramFacet("stats3").field("date").valueField("num").interval("day").zone("-02:00")) + .addFacet(dateHistogramFacet("stats4").field("date").valueScript("doc['num'].value * 2").interval("day").zone("-02:00")) + .execute().actionGet(); + + if (searchResponse.failedShards() > 0) { + logger.warn("Failed shards:"); + for (ShardSearchFailure shardSearchFailure : searchResponse.shardFailures()) { + logger.warn("-> {}", shardSearchFailure); + } + } + assertThat(searchResponse.failedShards(), equalTo(0)); + + DateHistogramFacet facet = searchResponse.facets().facet("stats1"); + assertThat(facet.name(), equalTo("stats1")); + assertThat(facet.entries().size(), equalTo(2)); + assertThat(facet.entries().get(0).time(), equalTo(utcTimeInMillis("2009-03-05"))); + assertThat(facet.entries().get(0).count(), equalTo(2l)); + assertThat(facet.entries().get(1).time(), equalTo(utcTimeInMillis("2009-03-06"))); + assertThat(facet.entries().get(1).count(), equalTo(1l)); + + // time zone causes the dates to shift by 2 + facet = searchResponse.facets().facet("stats2"); + assertThat(facet.name(), equalTo("stats2")); + assertThat(facet.entries().size(), equalTo(2)); + assertThat(facet.entries().get(0).time(), equalTo(timeInMillis("2009-03-04", DateTimeZone.forOffsetHours(-2)))); + assertThat(facet.entries().get(0).count(), equalTo(1l)); + assertThat(facet.entries().get(1).time(), equalTo(timeInMillis("2009-03-05", DateTimeZone.forOffsetHours(-2)))); + assertThat(facet.entries().get(1).count(), equalTo(2l)); + + // time zone causes the dates to shift by 2 + facet = searchResponse.facets().facet("stats3"); + assertThat(facet.name(), equalTo("stats3")); + assertThat(facet.entries().size(), equalTo(2)); + assertThat(facet.entries().get(0).time(), equalTo(timeInMillis("2009-03-04", DateTimeZone.forOffsetHours(-2)))); + assertThat(facet.entries().get(0).count(), equalTo(1l)); + assertThat(facet.entries().get(0).total(), equalTo(1d)); + assertThat(facet.entries().get(1).time(), equalTo(timeInMillis("2009-03-05", DateTimeZone.forOffsetHours(-2)))); + assertThat(facet.entries().get(1).count(), equalTo(2l)); + assertThat(facet.entries().get(1).total(), equalTo(5d)); + + // time zone causes the dates to shift by 2 + facet = searchResponse.facets().facet("stats4"); + assertThat(facet.name(), equalTo("stats4")); + assertThat(facet.entries().size(), equalTo(2)); + assertThat(facet.entries().get(0).time(), equalTo(timeInMillis("2009-03-04", DateTimeZone.forOffsetHours(-2)))); + assertThat(facet.entries().get(0).count(), equalTo(1l)); + assertThat(facet.entries().get(0).total(), equalTo(2d)); + assertThat(facet.entries().get(1).time(), equalTo(timeInMillis("2009-03-05", DateTimeZone.forOffsetHours(-2)))); + assertThat(facet.entries().get(1).count(), equalTo(2l)); + assertThat(facet.entries().get(1).total(), equalTo(10d)); + } + + private long utcTimeInMillis(String time) { + return timeInMillis(time, DateTimeZone.UTC); + } + + private long timeInMillis(String time, DateTimeZone zone) { + return ISODateTimeFormat.dateOptionalTimeParser().withZone(zone).parseMillis(time); + } }