Added normalisation to Derivative Reducer

This changes adds the ability to specify the units for the x-axis for derivative values and calculate the derivative based on those units rather than the original histograms x-axis units
This commit is contained in:
Colin Goodheart-Smithe 2015-04-13 11:44:29 +01:00
parent 7d38989cdb
commit 48a94a41df
5 changed files with 173 additions and 12 deletions

View File

@ -20,16 +20,17 @@
package org.elasticsearch.search.aggregations.reducers.derivative;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
import org.elasticsearch.search.aggregations.reducers.BucketHelpers.GapPolicy;
import org.elasticsearch.search.aggregations.reducers.ReducerBuilder;
import java.io.IOException;
import static org.elasticsearch.search.aggregations.reducers.BucketHelpers.GapPolicy;
public class DerivativeBuilder extends ReducerBuilder<DerivativeBuilder> {
private String format;
private GapPolicy gapPolicy;
private String units;
public DerivativeBuilder(String name) {
super(name, DerivativeReducer.TYPE.name());
@ -45,6 +46,16 @@ public class DerivativeBuilder extends ReducerBuilder<DerivativeBuilder> {
return this;
}
public DerivativeBuilder units(String units) {
this.units = units;
return this;
}
public DerivativeBuilder units(DateHistogramInterval units) {
this.units = units.toString();
return this;
}
@Override
protected XContentBuilder internalXContent(XContentBuilder builder, Params params) throws IOException {
if (format != null) {
@ -53,6 +64,9 @@ public class DerivativeBuilder extends ReducerBuilder<DerivativeBuilder> {
if (gapPolicy != null) {
builder.field(DerivativeParser.GAP_POLICY.getPreferredName(), gapPolicy.getName());
}
if (units != null) {
builder.field(DerivativeParser.UNITS.getPreferredName(), units);
}
return builder;
}

View File

@ -19,9 +19,15 @@
package org.elasticsearch.search.aggregations.reducers.derivative;
import com.google.common.collect.ImmutableMap;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.collect.MapBuilder;
import org.elasticsearch.common.rounding.DateTimeUnit;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.search.SearchParseException;
import org.elasticsearch.search.aggregations.reducers.BucketHelpers.GapPolicy;
import org.elasticsearch.search.aggregations.reducers.Reducer;
import org.elasticsearch.search.aggregations.reducers.ReducerFactory;
import org.elasticsearch.search.aggregations.support.format.ValueFormat;
@ -32,12 +38,23 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import static org.elasticsearch.search.aggregations.reducers.BucketHelpers.GapPolicy;
public class DerivativeParser implements Reducer.Parser {
public static final ParseField FORMAT = new ParseField("format");
public static final ParseField GAP_POLICY = new ParseField("gap_policy");
public static final ParseField UNITS = new ParseField("units");
private final ImmutableMap<String, DateTimeUnit> dateFieldUnits;
public DerivativeParser() {
dateFieldUnits = MapBuilder.<String, DateTimeUnit> newMapBuilder().put("year", DateTimeUnit.YEAR_OF_CENTURY)
.put("1y", DateTimeUnit.YEAR_OF_CENTURY).put("quarter", DateTimeUnit.QUARTER).put("1q", DateTimeUnit.QUARTER)
.put("month", DateTimeUnit.MONTH_OF_YEAR).put("1M", DateTimeUnit.MONTH_OF_YEAR).put("week", DateTimeUnit.WEEK_OF_WEEKYEAR)
.put("1w", DateTimeUnit.WEEK_OF_WEEKYEAR).put("day", DateTimeUnit.DAY_OF_MONTH).put("1d", DateTimeUnit.DAY_OF_MONTH)
.put("hour", DateTimeUnit.HOUR_OF_DAY).put("1h", DateTimeUnit.HOUR_OF_DAY).put("minute", DateTimeUnit.MINUTES_OF_HOUR)
.put("1m", DateTimeUnit.MINUTES_OF_HOUR).put("second", DateTimeUnit.SECOND_OF_MINUTE)
.put("1s", DateTimeUnit.SECOND_OF_MINUTE).immutableMap();
}
@Override
public String type() {
@ -50,6 +67,7 @@ public class DerivativeParser implements Reducer.Parser {
String currentFieldName = null;
String[] bucketsPaths = null;
String format = null;
String units = null;
GapPolicy gapPolicy = GapPolicy.IGNORE;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
@ -62,6 +80,8 @@ public class DerivativeParser implements Reducer.Parser {
bucketsPaths = new String[] { parser.text() };
} else if (GAP_POLICY.match(currentFieldName)) {
gapPolicy = GapPolicy.parse(context, parser.text());
} else if (UNITS.match(currentFieldName)) {
units = parser.text();
} else {
throw new SearchParseException(context, "Unknown key for a " + token + " in [" + reducerName + "]: ["
+ currentFieldName + "].");
@ -93,7 +113,20 @@ public class DerivativeParser implements Reducer.Parser {
formatter = ValueFormat.Patternable.Number.format(format).formatter();
}
return new DerivativeReducer.Factory(reducerName, bucketsPaths, formatter, gapPolicy);
long xAxisUnits = -1;
if (units != null) {
DateTimeUnit dateTimeUnit = dateFieldUnits.get(units);
if (dateTimeUnit != null) {
xAxisUnits = dateTimeUnit.field().getDurationField().getUnitMillis();
} else {
TimeValue timeValue = TimeValue.parseTimeValue(units, null);
if (timeValue != null) {
xAxisUnits = timeValue.getMillis();
}
}
}
return new DerivativeReducer.Factory(reducerName, bucketsPaths, formatter, gapPolicy, xAxisUnits);
}
}

View File

@ -25,6 +25,7 @@ import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregation.ReduceContext;
@ -39,6 +40,7 @@ import org.elasticsearch.search.aggregations.reducers.ReducerFactory;
import org.elasticsearch.search.aggregations.reducers.ReducerStreams;
import org.elasticsearch.search.aggregations.support.format.ValueFormatter;
import org.elasticsearch.search.aggregations.support.format.ValueFormatterStreams;
import org.joda.time.DateTime;
import java.io.IOException;
import java.util.ArrayList;
@ -66,15 +68,17 @@ public class DerivativeReducer extends Reducer {
private ValueFormatter formatter;
private GapPolicy gapPolicy;
private long xAxisUnits;
public DerivativeReducer() {
}
public DerivativeReducer(String name, String[] bucketsPaths, @Nullable ValueFormatter formatter, GapPolicy gapPolicy,
public DerivativeReducer(String name, String[] bucketsPaths, @Nullable ValueFormatter formatter, GapPolicy gapPolicy, long xAxisUnits,
Map<String, Object> metadata) {
super(name, bucketsPaths, metadata);
this.formatter = formatter;
this.gapPolicy = gapPolicy;
this.xAxisUnits = xAxisUnits;
}
@Override
@ -89,51 +93,74 @@ public class DerivativeReducer extends Reducer {
InternalHistogram.Factory<? extends InternalHistogram.Bucket> factory = histo.getFactory();
List newBuckets = new ArrayList<>();
Long lastBucketKey = null;
Double lastBucketValue = null;
for (InternalHistogram.Bucket bucket : buckets) {
Long thisBucketKey = resolveBucketKeyAsLong(bucket);
Double thisBucketValue = resolveBucketValue(histo, bucket, bucketsPaths()[0], gapPolicy);
if (lastBucketValue != null) {
double diff = thisBucketValue - lastBucketValue;
List<InternalAggregation> aggs = new ArrayList<>(Lists.transform(bucket.getAggregations().asList(), AGGREGATION_TRANFORM_FUNCTION));
aggs.add(new InternalSimpleValue(name(), diff, formatter, new ArrayList<Reducer>(), metaData()));
double gradient = thisBucketValue - lastBucketValue;
if (xAxisUnits != -1) {
double xDiff = (thisBucketKey - lastBucketKey) / xAxisUnits;
gradient = gradient / xDiff;
}
List<InternalAggregation> aggs = new ArrayList<>(Lists.transform(bucket.getAggregations().asList(),
AGGREGATION_TRANFORM_FUNCTION));
aggs.add(new InternalSimpleValue(name(), gradient, formatter, new ArrayList<Reducer>(), metaData()));
InternalHistogram.Bucket newBucket = factory.createBucket(bucket.getKey(), bucket.getDocCount(), new InternalAggregations(
aggs), bucket.getKeyed(), bucket.getFormatter());
newBuckets.add(newBucket);
} else {
newBuckets.add(bucket);
}
lastBucketKey = thisBucketKey;
lastBucketValue = thisBucketValue;
}
return factory.create(newBuckets, histo);
}
private Long resolveBucketKeyAsLong(InternalHistogram.Bucket bucket) {
Object key = bucket.getKey();
if (key instanceof DateTime) {
return ((DateTime) key).getMillis();
} else if (key instanceof Number) {
return ((Number) key).longValue();
} else {
throw new AggregationExecutionException("Bucket keys must be either a Number or a DateTime for aggregation " + name()
+ ". Found bucket with key " + key);
}
}
@Override
public void doReadFrom(StreamInput in) throws IOException {
formatter = ValueFormatterStreams.readOptional(in);
gapPolicy = GapPolicy.readFrom(in);
xAxisUnits = in.readLong();
}
@Override
public void doWriteTo(StreamOutput out) throws IOException {
ValueFormatterStreams.writeOptional(formatter, out);
gapPolicy.writeTo(out);
out.writeLong(xAxisUnits);
}
public static class Factory extends ReducerFactory {
private final ValueFormatter formatter;
private GapPolicy gapPolicy;
private long xAxisUnits;
public Factory(String name, String[] bucketsPaths, @Nullable ValueFormatter formatter, GapPolicy gapPolicy) {
public Factory(String name, String[] bucketsPaths, @Nullable ValueFormatter formatter, GapPolicy gapPolicy, long xAxisUnits) {
super(name, TYPE.name(), bucketsPaths);
this.formatter = formatter;
this.gapPolicy = gapPolicy;
this.xAxisUnits = xAxisUnits;
}
@Override
protected Reducer createInternal(Map<String, Object> metaData) throws IOException {
return new DerivativeReducer(name, bucketsPaths, formatter, gapPolicy, metaData);
return new DerivativeReducer(name, bucketsPaths, formatter, gapPolicy, xAxisUnits, metaData);
}
@Override

View File

@ -45,6 +45,7 @@ import static org.elasticsearch.search.aggregations.AggregationBuilders.dateHist
import static org.elasticsearch.search.aggregations.AggregationBuilders.sum;
import static org.elasticsearch.search.aggregations.reducers.ReducerBuilders.derivative;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
import static org.hamcrest.Matchers.closeTo;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.core.IsNull.notNullValue;
@ -147,6 +148,50 @@ public class DateDerivativeTests extends ElasticsearchIntegrationTest {
assertThat(docCountDeriv.value(), equalTo(1d));
}
@Test
public void singleValuedField_normalised() throws Exception {
SearchResponse response = client()
.prepareSearch("idx")
.addAggregation(
dateHistogram("histo").field("date").interval(DateHistogramInterval.MONTH).minDocCount(0)
.subAggregation(derivative("deriv").setBucketsPaths("_count").units(DateHistogramInterval.DAY))).execute()
.actionGet();
assertSearchResponse(response);
InternalHistogram deriv = response.getAggregations().get("histo");
assertThat(deriv, notNullValue());
assertThat(deriv.getName(), equalTo("histo"));
List<? extends Bucket> buckets = deriv.getBuckets();
assertThat(buckets.size(), equalTo(3));
DateTime key = new DateTime(2012, 1, 1, 0, 0, DateTimeZone.UTC);
Histogram.Bucket bucket = buckets.get(0);
assertThat(bucket, notNullValue());
assertThat((DateTime) bucket.getKey(), equalTo(key));
assertThat(bucket.getDocCount(), equalTo(1l));
SimpleValue docCountDeriv = bucket.getAggregations().get("deriv");
assertThat(docCountDeriv, nullValue());
key = new DateTime(2012, 2, 1, 0, 0, DateTimeZone.UTC);
bucket = buckets.get(1);
assertThat(bucket, notNullValue());
assertThat((DateTime) bucket.getKey(), equalTo(key));
assertThat(bucket.getDocCount(), equalTo(2l));
docCountDeriv = bucket.getAggregations().get("deriv");
assertThat(docCountDeriv, notNullValue());
assertThat(docCountDeriv.value(), closeTo(1d / 31d, 0.00001));
key = new DateTime(2012, 3, 1, 0, 0, DateTimeZone.UTC);
bucket = buckets.get(2);
assertThat(bucket, notNullValue());
assertThat((DateTime) bucket.getKey(), equalTo(key));
assertThat(bucket.getDocCount(), equalTo(3l));
docCountDeriv = bucket.getAggregations().get("deriv");
assertThat(docCountDeriv, notNullValue());
assertThat(docCountDeriv.value(), closeTo(1d / 29d, 0.00001));
}
@Test
public void singleValuedField_WithSubAggregation() throws Exception {
SearchResponse response = client()

View File

@ -43,6 +43,7 @@ import static org.elasticsearch.search.aggregations.AggregationBuilders.sum;
import static org.elasticsearch.search.aggregations.reducers.ReducerBuilders.derivative;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
import static org.hamcrest.Matchers.closeTo;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.core.IsNull.notNullValue;
import static org.hamcrest.core.IsNull.nullValue;
@ -196,6 +197,47 @@ public class DerivativeTests extends ElasticsearchIntegrationTest {
}
}
/**
* test first and second derivative on the sing
*/
@Test
public void singleValuedField_normalised() {
SearchResponse response = client()
.prepareSearch("idx")
.addAggregation(
histogram("histo").field(SINGLE_VALUED_FIELD_NAME).interval(interval).minDocCount(0)
.subAggregation(derivative("deriv").setBucketsPaths("_count").units("1"))
.subAggregation(derivative("2nd_deriv").setBucketsPaths("deriv"))).execute().actionGet();
assertSearchResponse(response);
InternalHistogram<Bucket> deriv = response.getAggregations().get("histo");
assertThat(deriv, notNullValue());
assertThat(deriv.getName(), equalTo("histo"));
List<? extends Bucket> buckets = deriv.getBuckets();
assertThat(buckets.size(), equalTo(numValueBuckets));
for (int i = 0; i < numValueBuckets; ++i) {
Histogram.Bucket bucket = buckets.get(i);
checkBucketKeyAndDocCount("Bucket " + i, bucket, i * interval, valueCounts[i]);
SimpleValue docCountDeriv = bucket.getAggregations().get("deriv");
if (i > 0) {
assertThat(docCountDeriv, notNullValue());
assertThat(docCountDeriv.value(), closeTo((double) (firstDerivValueCounts[i - 1]) / 5, 0.00001));
} else {
assertThat(docCountDeriv, nullValue());
}
SimpleValue docCount2ndDeriv = bucket.getAggregations().get("2nd_deriv");
if (i > 1) {
assertThat(docCount2ndDeriv, notNullValue());
assertThat(docCount2ndDeriv.value(), closeTo((double) (secondDerivValueCounts[i - 2]) / 5, 0.00001));
} else {
assertThat(docCount2ndDeriv, nullValue());
}
}
}
@Test
public void singleValuedField_WithSubAggregation() throws Exception {
SearchResponse response = client()