Begin moving date_histogram to offset rounding (take two) (#51271) (#51495)

We added a new rounding in #50609 that handles offsets to the start and
end of the rounding so that we could support `offset` in the `composite`
aggregation. This starts moving `date_histogram` to that new offset.

This is a redo of #50873 with more integration tests.

This reverts commit d114c9db3e1d1a766f9f48f846eed0466125ce83.
This commit is contained in:
Nik Everett 2020-01-27 13:40:54 -05:00 committed by GitHub
parent faf6bb27be
commit 4ff314a9d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 289 additions and 35 deletions

View File

@ -196,3 +196,214 @@ setup:
- match: { aggregations.histo.buckets.2.key_as_string: "2016-01-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 2 }
---
"date_histogram":
- skip:
version: " - 7.1.99"
reason: calendar_interval introduced in 7.2.0
- do:
indices.create:
index: test_2
body:
settings:
# There was a BWC issue that only showed up on empty shards. This
# test has 4 docs and 5 shards makes sure we get one empty.
number_of_shards: 5
mappings:
properties:
date:
type : date
- do:
bulk:
index: test_2
refresh: true
body:
- '{"index": {}}'
- '{"date": "2016-01-01"}'
- '{"index": {}}'
- '{"date": "2016-01-02"}'
- '{"index": {}}'
- '{"date": "2016-02-01"}'
- '{"index": {}}'
- '{"date": "2016-03-01"}'
- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: date
calendar_interval: month
- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 2 }
- match: { aggregations.histo.buckets.1.key_as_string: "2016-02-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 1 }
- match: { aggregations.histo.buckets.2.key_as_string: "2016-03-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 1 }
---
"date_histogram with offset":
- skip:
version: " - 7.1.99"
reason: calendar_interval introduced in 7.2.0
- do:
indices.create:
index: test_2
body:
settings:
# There was a BWC issue that only showed up on empty shards. This
# test has 4 docs and 5 shards makes sure we get one empty.
number_of_shards: 5
mappings:
properties:
date:
type : date
- do:
bulk:
index: test_2
refresh: true
body:
- '{"index": {}}'
- '{"date": "2016-01-01"}'
- '{"index": {}}'
- '{"date": "2016-01-02"}'
- '{"index": {}}'
- '{"date": "2016-02-01"}'
- '{"index": {}}'
- '{"date": "2016-03-01"}'
- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: date
calendar_interval: month
offset: +1d
- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2015-12-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 1 }
- match: { aggregations.histo.buckets.1.key_as_string: "2016-01-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 2 }
- match: { aggregations.histo.buckets.2.key_as_string: "2016-02-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 1 }
---
"date_histogram on range":
- skip:
version: " - 7.1.99"
reason: calendar_interval introduced in 7.2.0
- do:
indices.create:
index: test_2
body:
settings:
# There was a BWC issue that only showed up on empty shards. This
# test has 4 docs and 5 shards makes sure we get one empty.
number_of_shards: 5
mappings:
properties:
range:
type : date_range
- do:
bulk:
index: test_2
refresh: true
body:
- '{"index": {}}'
- '{"range": {"gte": "2016-01-01", "lt": "2016-01-02"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-01-02", "lt": "2016-01-03"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-02-01", "lt": "2016-02-02"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-03-01", "lt": "2016-03-02"}}'
- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: range
calendar_interval: month
- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 2 }
- match: { aggregations.histo.buckets.1.key_as_string: "2016-02-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 1 }
- match: { aggregations.histo.buckets.2.key_as_string: "2016-03-01T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 1 }
---
"date_histogram on range with offset":
- skip:
version: " - 7.1.99"
reason: calendar_interval introduced in 7.2.0
- do:
indices.create:
index: test_2
body:
settings:
# There was a BWC issue that only showed up on empty shards. This
# test has 4 docs and 5 shards makes sure we get one empty.
number_of_shards: 5
mappings:
properties:
range:
type : date_range
- do:
bulk:
index: test_2
refresh: true
body:
- '{"index": {}}'
- '{"range": {"gte": "2016-01-01", "lt": "2016-01-02"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-01-02", "lt": "2016-01-03"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-02-01", "lt": "2016-02-02"}}'
- '{"index": {}}'
- '{"range": {"gte": "2016-03-01", "lt": "2016-03-02"}}'
- do:
search:
body:
size: 0
aggs:
histo:
date_histogram:
field: range
calendar_interval: month
offset: +1d
- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2015-12-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.doc_count: 1 }
- match: { aggregations.histo.buckets.1.key_as_string: "2016-01-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.1.doc_count: 2 }
- match: { aggregations.histo.buckets.2.key_as_string: "2016-02-02T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.doc_count: 1 }

View File

@ -164,6 +164,19 @@ public abstract class Rounding implements Writeable {
*/
public abstract long nextRoundingValue(long value);
/**
* How "offset" this rounding is from the traditional "start" of the period.
* @deprecated We're in the process of abstracting offset *into* Rounding
* so keep any usage to migratory shims
*/
@Deprecated
public abstract long offset();
/**
* Strip the {@code offset} from these bounds.
*/
public abstract Rounding withoutOffset();
@Override
public abstract boolean equals(Object obj);
@ -425,6 +438,16 @@ public abstract class Rounding implements Writeable {
}
}
@Override
public long offset() {
return 0;
}
@Override
public Rounding withoutOffset() {
return this;
}
@Override
public int hashCode() {
return Objects.hash(unit, timeZone);
@ -556,6 +579,16 @@ public abstract class Rounding implements Writeable {
.toInstant().toEpochMilli();
}
@Override
public long offset() {
return 0;
}
@Override
public Rounding withoutOffset() {
return this;
}
@Override
public int hashCode() {
return Objects.hash(interval, timeZone);
@ -617,8 +650,17 @@ public abstract class Rounding implements Writeable {
@Override
public long nextRoundingValue(long value) {
// This isn't needed by the current users. We'll implement it when we migrate other users to it.
throw new UnsupportedOperationException("not yet supported");
return delegate.nextRoundingValue(value - offset) + offset;
}
@Override
public long offset() {
return offset;
}
@Override
public Rounding withoutOffset() {
return delegate;
}
@Override

View File

@ -499,14 +499,13 @@ public class DateHistogramAggregationBuilder extends ValuesSourceAggregationBuil
AggregatorFactory parent,
Builder subFactoriesBuilder) throws IOException {
final ZoneId tz = timeZone();
// TODO use offset here rather than explicitly in the aggregation
final Rounding rounding = dateHistogramInterval.createRounding(tz, 0);
final Rounding rounding = dateHistogramInterval.createRounding(tz, offset);
final ZoneId rewrittenTimeZone = rewriteTimeZone(queryShardContext);
final Rounding shardRounding;
if (tz == rewrittenTimeZone) {
shardRounding = rounding;
} else {
shardRounding = dateHistogramInterval.createRounding(rewrittenTimeZone, 0);
shardRounding = dateHistogramInterval.createRounding(rewrittenTimeZone, offset);
}
ExtendedBounds roundedBounds = null;
@ -514,7 +513,7 @@ public class DateHistogramAggregationBuilder extends ValuesSourceAggregationBuil
// parse any string bounds to longs and round
roundedBounds = this.extendedBounds.parseAndValidate(name, queryShardContext, config.format()).round(rounding);
}
return new DateHistogramAggregatorFactory(name, config, offset, order, keyed, minDocCount,
return new DateHistogramAggregatorFactory(name, config, order, keyed, minDocCount,
rounding, shardRounding, roundedBounds, queryShardContext, parent, subFactoriesBuilder, metaData);
}

View File

@ -64,10 +64,9 @@ class DateHistogramAggregator extends BucketsAggregator {
private final ExtendedBounds extendedBounds;
private final LongHash bucketOrds;
private long offset;
DateHistogramAggregator(String name, AggregatorFactories factories, Rounding rounding, Rounding shardRounding,
long offset, BucketOrder order, boolean keyed,
BucketOrder order, boolean keyed,
long minDocCount, @Nullable ExtendedBounds extendedBounds, @Nullable ValuesSource.Numeric valuesSource,
DocValueFormat formatter, SearchContext aggregationContext,
Aggregator parent, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
@ -75,7 +74,6 @@ class DateHistogramAggregator extends BucketsAggregator {
super(name, factories, aggregationContext, parent, pipelineAggregators, metaData);
this.rounding = rounding;
this.shardRounding = shardRounding;
this.offset = offset;
this.order = InternalOrder.validate(order, this);
this.keyed = keyed;
this.minDocCount = minDocCount;
@ -113,7 +111,7 @@ class DateHistogramAggregator extends BucketsAggregator {
long value = values.nextValue();
// We can use shardRounding here, which is sometimes more efficient
// if daylight saving times are involved.
long rounded = shardRounding.round(value - offset) + offset;
long rounded = shardRounding.round(value);
assert rounded >= previousRounded;
if (rounded == previousRounded) {
continue;
@ -148,9 +146,9 @@ class DateHistogramAggregator extends BucketsAggregator {
// value source will be null for unmapped fields
// Important: use `rounding` here, not `shardRounding`
InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0
? new InternalDateHistogram.EmptyBucketInfo(rounding, buildEmptySubAggregations(), extendedBounds)
? new InternalDateHistogram.EmptyBucketInfo(rounding.withoutOffset(), buildEmptySubAggregations(), extendedBounds)
: null;
return new InternalDateHistogram(name, buckets, order, minDocCount, offset, emptyBucketInfo, formatter, keyed,
return new InternalDateHistogram(name, buckets, order, minDocCount, rounding.offset(), emptyBucketInfo, formatter, keyed,
pipelineAggregators(), metaData());
}
@ -159,8 +157,8 @@ class DateHistogramAggregator extends BucketsAggregator {
InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0
? new InternalDateHistogram.EmptyBucketInfo(rounding, buildEmptySubAggregations(), extendedBounds)
: null;
return new InternalDateHistogram(name, Collections.emptyList(), order, minDocCount, offset, emptyBucketInfo, formatter, keyed,
pipelineAggregators(), metaData());
return new InternalDateHistogram(name, Collections.emptyList(), order, minDocCount, rounding.offset(), emptyBucketInfo, formatter,
keyed, pipelineAggregators(), metaData());
}
@Override

View File

@ -39,7 +39,6 @@ import java.util.Map;
public final class DateHistogramAggregatorFactory
extends ValuesSourceAggregatorFactory<ValuesSource> {
private final long offset;
private final BucketOrder order;
private final boolean keyed;
private final long minDocCount;
@ -48,12 +47,11 @@ public final class DateHistogramAggregatorFactory
private final Rounding shardRounding;
public DateHistogramAggregatorFactory(String name, ValuesSourceConfig<ValuesSource> config,
long offset, BucketOrder order, boolean keyed, long minDocCount,
BucketOrder order, boolean keyed, long minDocCount,
Rounding rounding, Rounding shardRounding, ExtendedBounds extendedBounds, QueryShardContext queryShardContext,
AggregatorFactory parent, AggregatorFactories.Builder subFactoriesBuilder,
Map<String, Object> metaData) throws IOException {
super(name, config, queryShardContext, parent, subFactoriesBuilder, metaData);
this.offset = offset;
this.order = order;
this.keyed = keyed;
this.minDocCount = minDocCount;
@ -104,7 +102,7 @@ public final class DateHistogramAggregatorFactory
private Aggregator createAggregator(ValuesSource.Numeric valuesSource, SearchContext searchContext,
Aggregator parent, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
return new DateHistogramAggregator(name, factories, rounding, shardRounding, offset, order, keyed, minDocCount, extendedBounds,
return new DateHistogramAggregator(name, factories, rounding, shardRounding, order, keyed, minDocCount, extendedBounds,
valuesSource, config.format(), searchContext, parent, pipelineAggregators, metaData);
}
@ -113,7 +111,7 @@ public final class DateHistogramAggregatorFactory
Aggregator parent,
List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
return new DateRangeHistogramAggregator(name, factories, rounding, shardRounding, offset, order, keyed, minDocCount, extendedBounds,
return new DateRangeHistogramAggregator(name, factories, rounding, shardRounding, order, keyed, minDocCount, extendedBounds,
valuesSource, config.format(), searchContext, parent, pipelineAggregators, metaData);
}

View File

@ -67,10 +67,9 @@ class DateRangeHistogramAggregator extends BucketsAggregator {
private final ExtendedBounds extendedBounds;
private final LongHash bucketOrds;
private long offset;
DateRangeHistogramAggregator(String name, AggregatorFactories factories, Rounding rounding, Rounding shardRounding,
long offset, BucketOrder order, boolean keyed,
BucketOrder order, boolean keyed,
long minDocCount, @Nullable ExtendedBounds extendedBounds, @Nullable ValuesSource.Range valuesSource,
DocValueFormat formatter, SearchContext aggregationContext,
Aggregator parent, List<PipelineAggregator> pipelineAggregators,
@ -79,7 +78,6 @@ class DateRangeHistogramAggregator extends BucketsAggregator {
super(name, factories, aggregationContext, parent, pipelineAggregators, metaData);
this.rounding = rounding;
this.shardRounding = shardRounding;
this.offset = offset;
this.order = InternalOrder.validate(order, this);
this.keyed = keyed;
this.minDocCount = minDocCount;
@ -126,8 +124,8 @@ class DateRangeHistogramAggregator extends BucketsAggregator {
// The encoding should ensure that this assert is always true.
assert from >= previousFrom : "Start of range not >= previous start";
final Long to = (Long) range.getTo();
final long startKey = offsetAwareRounding(shardRounding, from, offset);
final long endKey = offsetAwareRounding(shardRounding, to, offset);
final long startKey = shardRounding.round(from);
final long endKey = shardRounding.round(to);
for (long key = startKey > previousKey ? startKey : previousKey; key <= endKey;
key = shardRounding.nextRoundingValue(key)) {
if (key == previousKey) {
@ -153,10 +151,6 @@ class DateRangeHistogramAggregator extends BucketsAggregator {
};
}
private long offsetAwareRounding(Rounding rounding, long value, long offset) {
return rounding.round(value - offset) + offset;
}
@Override
public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException {
assert owningBucketOrdinal == 0;
@ -173,9 +167,9 @@ class DateRangeHistogramAggregator extends BucketsAggregator {
// value source will be null for unmapped fields
// Important: use `rounding` here, not `shardRounding`
InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0
? new InternalDateHistogram.EmptyBucketInfo(rounding, buildEmptySubAggregations(), extendedBounds)
? new InternalDateHistogram.EmptyBucketInfo(rounding.withoutOffset(), buildEmptySubAggregations(), extendedBounds)
: null;
return new InternalDateHistogram(name, buckets, order, minDocCount, offset, emptyBucketInfo, formatter, keyed,
return new InternalDateHistogram(name, buckets, order, minDocCount, rounding.offset(), emptyBucketInfo, formatter, keyed,
pipelineAggregators(), metaData());
}
@ -184,8 +178,8 @@ class DateRangeHistogramAggregator extends BucketsAggregator {
InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0
? new InternalDateHistogram.EmptyBucketInfo(rounding, buildEmptySubAggregations(), extendedBounds)
: null;
return new InternalDateHistogram(name, Collections.emptyList(), order, minDocCount, offset, emptyBucketInfo, formatter, keyed,
pipelineAggregators(), metaData());
return new InternalDateHistogram(name, Collections.emptyList(), order, minDocCount, rounding.offset(), emptyBucketInfo, formatter,
keyed, pipelineAggregators(), metaData());
}
@Override

View File

@ -166,7 +166,11 @@ public class ExtendedBounds implements ToXContentFragment, Writeable {
}
ExtendedBounds round(Rounding rounding) {
return new ExtendedBounds(min != null ? rounding.round(min) : null, max != null ? rounding.round(max) : null);
// Extended bounds shouldn't be effected by the offset
Rounding effectiveRounding = rounding.withoutOffset();
return new ExtendedBounds(
min != null ? effectiveRounding.round(min) : null,
max != null ? effectiveRounding.round(max) : null);
}
@Override

View File

@ -201,10 +201,18 @@ public class RoundingTests extends ESTestCase {
Rounding rounding = Rounding.builder(Rounding.DateTimeUnit.DAY_OF_MONTH).offset(twoHours).build();
assertThat(rounding.round(0), equalTo(-oneDay + twoHours));
assertThat(rounding.round(twoHours), equalTo(twoHours));
assertThat(rounding.nextRoundingValue(-oneDay), equalTo(-oneDay + twoHours));
assertThat(rounding.nextRoundingValue(0), equalTo(twoHours));
assertThat(rounding.withoutOffset().round(0), equalTo(0L));
assertThat(rounding.withoutOffset().nextRoundingValue(0), equalTo(oneDay));
rounding = Rounding.builder(Rounding.DateTimeUnit.DAY_OF_MONTH).offset(-twoHours).build();
assertThat(rounding.round(0), equalTo(-twoHours));
assertThat(rounding.round(oneDay - twoHours), equalTo(oneDay - twoHours));
assertThat(rounding.nextRoundingValue(-oneDay), equalTo(-twoHours));
assertThat(rounding.nextRoundingValue(0), equalTo(oneDay - twoHours));
assertThat(rounding.withoutOffset().round(0), equalTo(0L));
assertThat(rounding.withoutOffset().nextRoundingValue(0), equalTo(oneDay));
}
/**

View File

@ -164,7 +164,7 @@ public class PipelineAggregationHelperTests extends ESTestCase {
new AggregatorFactories.Builder(), Collections.emptyMap());
break;
case 1:
factory = new DateHistogramAggregatorFactory("name", mock(ValuesSourceConfig.class), 0L,
factory = new DateHistogramAggregatorFactory("name", mock(ValuesSourceConfig.class),
mock(InternalOrder.class), false, 0L, mock(Rounding.class), mock(Rounding.class),
mock(ExtendedBounds.class), mock(QueryShardContext.class), mock(AggregatorFactory.class),
new AggregatorFactories.Builder(), Collections.emptyMap());

View File

@ -131,7 +131,7 @@ public class CumulativeCardinalityAggregatorTests extends AggregatorTestCase {
// Date Histogram
aggBuilders.clear();
aggBuilders.add(new CumulativeCardinalityPipelineAggregationBuilder("cumulative_card", "sum"));
parent = new DateHistogramAggregatorFactory("name", valuesSourceConfig, 0L,
parent = new DateHistogramAggregatorFactory("name", valuesSourceConfig,
mock(InternalOrder.class), false, 0L, mock(Rounding.class), mock(Rounding.class),
mock(ExtendedBounds.class), mock(QueryShardContext.class), mock(AggregatorFactory.class),
new AggregatorFactories.Builder(), Collections.emptyMap());