Fix date_nanos in composite aggs (backport of #53315) (#53347)

It looks like `date_nanos` fields weren't likely to work properly in
composite aggs because composites iterate field values using points and
we weren't converting the points into milliseconds. Because the doc
values were coming back in milliseconds we ended up geting very confused
and just never collecting sub-aggregations.

This fixes that by adding a method to `DateFieldMapper.Resolution` to
`parsePointAsMillis` which is similarly in name and function to
`NumberFieldMapper.NumberType`'s `parsePoint` except that it normalizes
to milliseconds which is what aggs need at the moment.

Closes #53168
This commit is contained in:
Nik Everett 2020-03-11 13:00:07 -04:00 committed by GitHub
parent c408a34a21
commit 9ada508347
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 83 additions and 8 deletions

View File

@ -886,6 +886,66 @@ setup:
- match: { aggregations.test.buckets.0.key.date: "2017-10-21T00:00:00.000-02:00" }
- match: { aggregations.test.buckets.0.doc_count: 2 }
---
"date_histogram on date_nanos":
- skip:
version: " - 7.6.99"
reason: Fixed in 7.7.0
- do:
indices.create:
index: test_nanos
body:
mappings:
properties:
date_nanos:
type: date_nanos
- do:
index:
index: test_nanos
id: 7
body: { "date_nanos": "2017-11-21T01:00:00" }
refresh: true
- do:
index:
index: test_nanos
id: 8
body: { "date_nanos": "2017-11-22T01:00:00" }
refresh: true
- do:
index:
index: test_nanos
id: 9
body: { "date_nanos": "2017-11-22T02:00:00" }
refresh: true
- do:
search:
index: test_nanos
body:
aggregations:
test:
composite:
sources:
- date:
date_histogram:
field: date_nanos
calendar_interval: 1d
format: iso8601 # Format makes the comparisons a little more obvious
aggregations:
avg:
avg:
field: date_nanos
- match: { hits.total.value: 3 }
- match: { hits.total.relation: eq }
- length: { aggregations.test.buckets: 2 }
- match: { aggregations.test.buckets.0.key.date: "2017-11-21T00:00:00.000Z" }
- match: { aggregations.test.buckets.0.doc_count: 1 }
- match: { aggregations.test.buckets.0.avg.value_as_string: "2017-11-21T01:00:00.000Z" }
- match: { aggregations.test.buckets.1.key.date: "2017-11-22T00:00:00.000Z" }
- match: { aggregations.test.buckets.1.doc_count: 2 }
- match: { aggregations.test.buckets.1.avg.value_as_string: "2017-11-22T01:30:00.000Z" }
---
"Terms source from sorted":
- do:

View File

@ -94,6 +94,11 @@ public final class DateFieldMapper extends FieldMapper {
public Instant clampToValidRange(Instant instant) {
return instant;
}
@Override
public long parsePointAsMillis(byte[] value) {
return LongPoint.decodeDimension(value, 0);
}
},
NANOSECONDS("date_nanos", NumericType.DATE_NANOSECONDS) {
@Override
@ -110,6 +115,11 @@ public final class DateFieldMapper extends FieldMapper {
public Instant clampToValidRange(Instant instant) {
return DateUtils.clampToNanosRange(instant);
}
@Override
public long parsePointAsMillis(byte[] value) {
return DateUtils.toMilliSeconds(LongPoint.decodeDimension(value, 0));
}
};
private final String type;
@ -138,8 +148,17 @@ public final class DateFieldMapper extends FieldMapper {
*/
public abstract Instant toInstant(long value);
/**
* Return the instant that this range can represent that is closest to
* the provided instant.
*/
public abstract Instant clampToValidRange(Instant instant);
/**
* Decode the points representation of this field as milliseconds.
*/
public abstract long parsePointAsMillis(byte[] value);
public static Resolution ofOrdinal(int ord) {
for (Resolution resolution : values()) {
if (ord == resolution.ordinal()) {

View File

@ -269,7 +269,8 @@ class LongValuesSource extends SingleDimensionValuesSource<Long> {
}
return new PointsSortedDocsProducer(fieldType.name(), toBucketFunction, lowerPoint, upperPoint);
} else if (fieldType instanceof DateFieldMapper.DateFieldType) {
final ToLongFunction<byte[]> toBucketFunction = (value) -> rounding.applyAsLong(LongPoint.decodeDimension(value, 0));
ToLongFunction<byte[]> decode = ((DateFieldMapper.DateFieldType) fieldType).resolution()::parsePointAsMillis;
ToLongFunction<byte[]> toBucketFunction = value -> rounding.applyAsLong(decode.applyAsLong(value));
return new PointsSortedDocsProducer(fieldType.name(), toBucketFunction, lowerPoint, upperPoint);
} else {
return null;

View File

@ -18,15 +18,14 @@
*/
package org.elasticsearch.search.aggregations.metrics;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.util.Bits;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.DoubleArray;
@ -191,11 +190,7 @@ class MinAggregator extends NumericMetricsAggregator.SingleValue {
converter = ((NumberFieldMapper.NumberFieldType) fieldType)::parsePoint;
} else if (fieldType.getClass() == DateFieldMapper.DateFieldType.class) {
DateFieldMapper.DateFieldType dft = (DateFieldMapper.DateFieldType) fieldType;
/*
* Makes sure that nanoseconds decode to milliseconds, just
* like they do when you run the agg without the optimization.
*/
converter = (in) -> dft.resolution().toInstant(LongPoint.decodeDimension(in, 0)).toEpochMilli();
converter = dft.resolution()::parsePointAsMillis;
}
return converter;
}