Aggregations: Add 'offset' option to histogram aggregation
Histogram aggregation supports an 'offset' option to move bucket boundaries. In a histogram with buckets of size X these can be moved from 0, X, 2X, 3X,... by an offset value of Y to Y, X+Y, 2X+Y, 3X+Y... by using the 'offset' option. The previous 'pre_offset' and 'post_offset' options are removed in favour of the simplified 'offset' option. Closes #9417 Closes #9505
This commit is contained in:
parent
6c27f1242a
commit
44193e7ba5
|
@ -6,7 +6,7 @@ It dynamically builds fixed size (a.k.a. interval) buckets over the values. For
|
|||
that holds a price (numeric), we can configure this aggregation to dynamically build buckets with interval `5`
|
||||
(in case of price it may represent $5). When the aggregation executes, the price field of every document will be
|
||||
evaluated and will be rounded down to its closest bucket - for example, if the price is `32` and the bucket size is `5`
|
||||
then the rounding will yield `30` and thus the document will "fall" into the bucket that is associated withe the key `30`.
|
||||
then the rounding will yield `30` and thus the document will "fall" into the bucket that is associated with the key `30`.
|
||||
To make this more formal, here is the rounding function that is used:
|
||||
|
||||
[source,java]
|
||||
|
@ -326,6 +326,15 @@ Here is an example of what the response could look like:
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
==== Offset
|
||||
|
||||
By default the bucket keys start with 0 and then continue in even spaced steps of `interval`, e.g. if the interval is 10 the first buckets
|
||||
(assuming there is data inside them) will be [0 - 9], [10-19], [20-29]. The bucket boundaries can be shifted by using the `offset` option.
|
||||
|
||||
This can be best illustrated with an example. If there are 10 documents with values ranging from 5 to 14, using interval `10` will result in
|
||||
two buckets with 5 documents each. If an additional offset `5` is used, there will be only one single bucket [5-14] containing all the 10
|
||||
documents.
|
||||
|
||||
==== Response Format
|
||||
|
||||
By default, the buckets are returned as an ordered array. It is also possible to request the response as a hash
|
||||
|
|
|
@ -35,8 +35,7 @@ public class HistogramBuilder extends ValuesSourceAggregationBuilder<HistogramBu
|
|||
private Long minDocCount;
|
||||
private Long extendedBoundsMin;
|
||||
private Long extendedBoundsMax;
|
||||
private Long preOffset;
|
||||
private Long postOffset;
|
||||
private Long offset;
|
||||
|
||||
/**
|
||||
* Constructs a new histogram aggregation builder.
|
||||
|
@ -92,18 +91,10 @@ public class HistogramBuilder extends ValuesSourceAggregationBuilder<HistogramBu
|
|||
}
|
||||
|
||||
/**
|
||||
* Set the offset to apply prior to computing buckets.
|
||||
* Set the offset to apply to shift bucket boundaries.
|
||||
*/
|
||||
public HistogramBuilder preOffset(long preOffset) {
|
||||
this.preOffset = preOffset;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the offset to apply after having computed buckets.
|
||||
*/
|
||||
public HistogramBuilder postOffset(long postOffset) {
|
||||
this.postOffset = postOffset;
|
||||
public HistogramBuilder offset(long offset) {
|
||||
this.offset = offset;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -119,15 +110,10 @@ public class HistogramBuilder extends ValuesSourceAggregationBuilder<HistogramBu
|
|||
order.toXContent(builder, params);
|
||||
}
|
||||
|
||||
if (preOffset != null) {
|
||||
builder.field("pre_offset", preOffset);
|
||||
if (offset != null) {
|
||||
builder.field("offset", offset);
|
||||
}
|
||||
|
||||
if (postOffset != null) {
|
||||
builder.field("post_offset", postOffset);
|
||||
}
|
||||
|
||||
|
||||
if (minDocCount != null) {
|
||||
builder.field("min_doc_count", minDocCount);
|
||||
}
|
||||
|
|
|
@ -56,8 +56,7 @@ public class HistogramParser implements Aggregator.Parser {
|
|||
InternalOrder order = (InternalOrder) InternalOrder.KEY_ASC;
|
||||
long interval = -1;
|
||||
ExtendedBounds extendedBounds = null;
|
||||
long preOffset = 0;
|
||||
long postOffset = 0;
|
||||
long offset = 0;
|
||||
|
||||
XContentParser.Token token;
|
||||
String currentFieldName = null;
|
||||
|
@ -73,10 +72,8 @@ public class HistogramParser implements Aggregator.Parser {
|
|||
minDocCount = parser.longValue();
|
||||
} else if ("keyed".equals(currentFieldName)) {
|
||||
keyed = parser.booleanValue();
|
||||
} else if ("pre_offset".equals(currentFieldName) || "preOffset".equals(currentFieldName)) {
|
||||
preOffset = parser.longValue();
|
||||
} else if ("post_offset".equals(currentFieldName) || "postOffset".equals(currentFieldName)) {
|
||||
postOffset = parser.longValue();
|
||||
} else if ("offset".equals(currentFieldName)) {
|
||||
offset = parser.longValue();
|
||||
} else {
|
||||
throw new SearchParseException(context, "Unknown key for a " + token + " in aggregation [" + aggregationName + "]: [" + currentFieldName + "].");
|
||||
}
|
||||
|
@ -121,10 +118,10 @@ public class HistogramParser implements Aggregator.Parser {
|
|||
if (interval < 0) {
|
||||
throw new SearchParseException(context, "Missing required field [interval] for histogram aggregation [" + aggregationName + "]");
|
||||
}
|
||||
|
||||
|
||||
Rounding rounding = new Rounding.Interval(interval);
|
||||
if (preOffset != 0 || postOffset != 0) {
|
||||
rounding = new Rounding.PrePostRounding((Rounding.Interval) rounding, preOffset, postOffset);
|
||||
if (offset != 0) {
|
||||
rounding = new Rounding.PrePostRounding((Rounding.Interval) rounding, -offset, offset);
|
||||
}
|
||||
|
||||
if (extendedBounds != null) {
|
||||
|
|
|
@ -20,25 +20,82 @@
|
|||
package org.elasticsearch.common.rounding;
|
||||
|
||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.hamcrest.Matchers.greaterThan;
|
||||
import static org.hamcrest.Matchers.lessThanOrEqualTo;
|
||||
|
||||
|
||||
|
||||
public class RoundingTests extends ElasticsearchTestCase {
|
||||
|
||||
/**
|
||||
* simple testcase to ilustrate how Rounding.Interval works on readable input
|
||||
*/
|
||||
@Test
|
||||
public void testInterval() {
|
||||
int interval = 10;
|
||||
Rounding.Interval rounding = new Rounding.Interval(interval);
|
||||
int value = 24;
|
||||
final long key = rounding.roundKey(24);
|
||||
final long r = rounding.round(24);
|
||||
String message = "round(" + value + ", interval=" + interval + ") = " + r;
|
||||
assertEquals(value/interval, key);
|
||||
assertEquals(value/interval * interval, r);
|
||||
assertEquals(message, 0, r % interval);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIntervalRandom() {
|
||||
final long interval = randomIntBetween(1, 100);
|
||||
Rounding.Interval rounding = new Rounding.Interval(interval);
|
||||
for (int i = 0; i < 1000; ++i) {
|
||||
long l = Math.max(randomLong(), Long.MIN_VALUE + interval);
|
||||
final long key = rounding.roundKey(l);
|
||||
final long r = rounding.round(l);
|
||||
String message = "round(" + l + ", interval=" + interval + ") = " + r;
|
||||
assertEquals(message, 0, r % interval);
|
||||
assertThat(message, r, lessThanOrEqualTo(l));
|
||||
assertThat(message, r + interval, greaterThan(l));
|
||||
assertEquals(message, r, key*interval);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple testcase to ilustrate how Rounding.Pre works on readable input.
|
||||
* preOffset shifts input value before rounding (so here 24 -> 31)
|
||||
* postOffset shifts rounded Value after rounding (here 30 -> 35)
|
||||
*/
|
||||
@Test
|
||||
public void testPrePostRounding() {
|
||||
int interval = 10;
|
||||
int value = 24;
|
||||
int preOffset = 7;
|
||||
int postOffset = 5;
|
||||
Rounding.PrePostRounding rounding = new Rounding.PrePostRounding(new Rounding.Interval(interval), preOffset, postOffset);
|
||||
final long key = rounding.roundKey(24);
|
||||
final long roundedValue = rounding.round(24);
|
||||
String message = "round(" + value + ", interval=" + interval + ") = " + roundedValue;
|
||||
assertEquals(3, key);
|
||||
assertEquals(35, roundedValue);
|
||||
assertEquals(message, postOffset, roundedValue % interval);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPrePostRoundingRandom() {
|
||||
final long interval = randomIntBetween(1, 100);
|
||||
Rounding.Interval internalRounding = new Rounding.Interval(interval);
|
||||
final long preRounding = randomIntBetween(-100, 100);
|
||||
final long postRounding = randomIntBetween(-100, 100);
|
||||
Rounding.PrePostRounding prePost = new Rounding.PrePostRounding(new Rounding.Interval(interval), preRounding, postRounding);
|
||||
long safetyMargin = Math.abs(interval) + Math.abs(preRounding) + Math.abs(postRounding); // to prevent range overflow / underflow
|
||||
for (int i = 0; i < 1000; ++i) {
|
||||
long l = Math.max(randomLong() - safetyMargin, Long.MIN_VALUE + safetyMargin);
|
||||
final long key = prePost.roundKey(l);
|
||||
final long r = prePost.round(l);
|
||||
String message = "round(" + l + ", interval=" + interval + ") = "+ r;
|
||||
assertEquals(message, internalRounding.round(l+preRounding), r - postRounding);
|
||||
assertThat(message, r - postRounding, lessThanOrEqualTo(l + preRounding));
|
||||
assertThat(message, r + interval - postRounding, greaterThan(l + preRounding));
|
||||
assertEquals(message, r, key*interval + postRounding);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -125,7 +125,6 @@ public class HistogramTests extends ElasticsearchIntegrationTest {
|
|||
|
||||
assertSearchResponse(response);
|
||||
|
||||
|
||||
Histogram histo = response.getAggregations().get("histo");
|
||||
assertThat(histo, notNullValue());
|
||||
assertThat(histo.getName(), equalTo("histo"));
|
||||
|
@ -140,51 +139,71 @@ public class HistogramTests extends ElasticsearchIntegrationTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void singleValuedField_withPreOffset() throws Exception {
|
||||
long preOffsetMultiplier = randomIntBetween(2, 10);
|
||||
SearchResponse response = client().prepareSearch("idx")
|
||||
.addAggregation(histogram("histo").field(SINGLE_VALUED_FIELD_NAME).interval(interval).preOffset(preOffsetMultiplier * interval))
|
||||
public void singleValuedField_withOffset() throws Exception {
|
||||
int interval1 = 10;
|
||||
int offset = 5;
|
||||
SearchResponse response = client()
|
||||
.prepareSearch("idx")
|
||||
.addAggregation(histogram("histo").field(SINGLE_VALUED_FIELD_NAME).interval(interval1).offset(offset))
|
||||
.execute().actionGet();
|
||||
|
||||
assertSearchResponse(response);
|
||||
|
||||
|
||||
// from setup we have between 6 and 20 documents, each with value 1 in test field
|
||||
int expectedNumberOfBuckets = (offset >= (numDocs % interval + 1)) ? numValueBuckets : numValueBuckets + 1;
|
||||
Histogram histo = response.getAggregations().get("histo");
|
||||
assertThat(histo, notNullValue());
|
||||
assertThat(histo.getName(), equalTo("histo"));
|
||||
List<? extends Bucket> buckets = histo.getBuckets();
|
||||
assertThat(buckets.size(), equalTo(numValueBuckets));
|
||||
assertThat(histo.getBuckets().size(), equalTo(expectedNumberOfBuckets));
|
||||
|
||||
for (int i = 0; i < numValueBuckets; ++i) {
|
||||
Histogram.Bucket bucket = buckets.get(i);
|
||||
assertThat(bucket, notNullValue());
|
||||
assertThat(((Number) bucket.getKey()).longValue(), equalTo((long) (i + preOffsetMultiplier) * interval));
|
||||
assertThat(bucket.getDocCount(), equalTo(valueCounts[i]));
|
||||
}
|
||||
// first bucket should start at -5, contain 4 documents
|
||||
Histogram.Bucket bucket = histo.getBuckets().get(0);
|
||||
assertThat(bucket, notNullValue());
|
||||
assertThat(((Number) bucket.getKey()).longValue(), equalTo(-5L));
|
||||
assertThat(bucket.getDocCount(), equalTo(4L));
|
||||
|
||||
// last bucket should have (numDocs % interval + 1) docs
|
||||
bucket = histo.getBuckets().get(0);
|
||||
assertThat(bucket, notNullValue());
|
||||
assertThat(((Number) bucket.getKey()).longValue(), equalTo(numDocs%interval1 + 5L));
|
||||
assertThat(bucket.getDocCount(), equalTo((numDocs % interval) + 1L));
|
||||
}
|
||||
|
||||
/**
|
||||
* Shift buckets by random offset between [2..interval]. From setup we have 1 doc per values from 1..numdocs.
|
||||
* Special care needs to be taken for expecations on counts in first and last bucket.
|
||||
*/
|
||||
@Test
|
||||
public void singleValuedField_withPostOffset() throws Exception {
|
||||
long postOffsetMultiplier = randomIntBetween(2, 10);
|
||||
SearchResponse response = client().prepareSearch("idx")
|
||||
.addAggregation(histogram("histo").field(SINGLE_VALUED_FIELD_NAME).interval(interval).postOffset(postOffsetMultiplier * interval))
|
||||
public void singleValuedField_withRandomOffset() throws Exception {
|
||||
int offset = randomIntBetween(2, interval);
|
||||
SearchResponse response = client()
|
||||
.prepareSearch("idx")
|
||||
.addAggregation(histogram("histo").field(SINGLE_VALUED_FIELD_NAME).interval(interval).offset(offset))
|
||||
.execute().actionGet();
|
||||
|
||||
assertSearchResponse(response);
|
||||
|
||||
// shifting by offset>2 creates new extra bucket [0,offset-1]
|
||||
// if offset is >= number of values in original last bucket, that effect is canceled
|
||||
int expectedNumberOfBuckets = (offset >= (numDocs % interval + 1)) ? numValueBuckets : numValueBuckets + 1;
|
||||
|
||||
Histogram histo = response.getAggregations().get("histo");
|
||||
assertThat(histo, notNullValue());
|
||||
assertThat(histo.getName(), equalTo("histo"));
|
||||
List<? extends Bucket> buckets = histo.getBuckets();
|
||||
assertThat(buckets.size(), equalTo(numValueBuckets));
|
||||
assertThat(histo.getBuckets().size(), equalTo(expectedNumberOfBuckets));
|
||||
|
||||
for (int i = 0; i < numValueBuckets; ++i) {
|
||||
Histogram.Bucket bucket = buckets.get(i);
|
||||
int docsCounted = 0;
|
||||
for (int i = 0; i < expectedNumberOfBuckets; ++i) {
|
||||
Histogram.Bucket bucket = histo.getBuckets().get(i);
|
||||
assertThat(bucket, notNullValue());
|
||||
assertThat(((Number) bucket.getKey()).longValue(), equalTo((long) (i + postOffsetMultiplier) * interval));
|
||||
assertThat(bucket.getDocCount(), equalTo(valueCounts[i]));
|
||||
assertThat(((Number) bucket.getKey()).longValue(), equalTo((long) ((i-1) * interval + offset)));
|
||||
if (i==0) {
|
||||
// first bucket
|
||||
long expectedFirstBucketCount = offset-1;
|
||||
assertThat(bucket.getDocCount(), equalTo(expectedFirstBucketCount));
|
||||
docsCounted += expectedFirstBucketCount;
|
||||
} else if(i<expectedNumberOfBuckets-1) {
|
||||
assertThat(bucket.getDocCount(), equalTo((long) interval));
|
||||
docsCounted += interval;
|
||||
} else {
|
||||
assertThat(bucket.getDocCount(), equalTo((long) numDocs - docsCounted));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue