Add date and date_nanos conversion to the numeric_type sort option (#40199) (#40224)

This change adds an option to convert a `date` field to nanoseconds resolution
 and a `date_nanos` field to millisecond resolution when sorting.
The resolution of the sort can be set using the `numeric_type` option of the
field sort builder. The conversion is done at the shard level and is restricted
to dates from 1970 to 2262 for the nanoseconds resolution in order to avoid
numeric overflow.
This commit is contained in:
Jim Ferenczi 2019-03-20 16:50:28 +01:00 committed by GitHub
parent 5eb33f2df4
commit 3400483af4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 305 additions and 48 deletions

View File

@ -112,8 +112,8 @@ POST /_search
For numeric fields it is also possible to cast the values from one type
to another using the `numeric_type` option.
This option accepts the following values: [`"double", "long"`] and can be useful
for cross-index search if the sort field is mapped differently on some
This option accepts the following values: [`"double", "long", "date", "date_nanos"`]
and can be useful for cross-index search if the sort field is mapped differently on some
indices.
Consider for instance these two indices:
@ -175,6 +175,63 @@ but note that in this case floating points are replaced by the largest
value that is less than or equal (greater than or equal if the value
is negative) to the argument and is equal to a mathematical integer.
This option can also be used to convert a `date` field that uses millisecond
resolution to a `date_nanos` field with nanosecond resolution.
Consider for instance these two indices:
[source,js]
--------------------------------------------------
PUT /index_double
{
"mappings": {
"properties": {
"field": { "type": "date" }
}
}
}
--------------------------------------------------
// CONSOLE
[source,js]
--------------------------------------------------
PUT /index_long
{
"mappings": {
"properties": {
"field": { "type": "date_nanos" }
}
}
}
--------------------------------------------------
// CONSOLE
// TEST[continued]
Values in these indices are stored with different resolutions so sorting on these
fields will always sort the `date` before the `date_nanos` (ascending order).
With the `numeric_type` type option it is possible to set a single resolution for
the sort, setting to `date` will convert the `date_nanos` to the millisecond resolution
while `date_nanos` will convert the values in the `date` field to the nanoseconds resolution:
[source,js]
--------------------------------------------------
POST /index_long,index_double/_search
{
"sort" : [
{
"field" : {
"numeric_type" : "date_nanos"
}
}
]
}
--------------------------------------------------
// CONSOLE
// TEST[continued]
[WARNING]
To avoid overflow, the conversion to `date_nanos` cannot be applied on dates before
1970 and after 2262 as nanoseconds are represented as longs.
[[nested-sorting]]
==== Sorting within nested objects.

View File

@ -89,6 +89,8 @@ public class DateUtils {
private static final Instant MAX_NANOSECOND_INSTANT = Instant.parse("2262-04-11T23:47:16.854775807Z");
static final long MAX_NANOSECOND_IN_MILLIS = MAX_NANOSECOND_INSTANT.toEpochMilli();
/**
* convert a java time instant to a long value which is stored in lucene
* the long value resembles the nanoseconds since the epoch
@ -117,7 +119,7 @@ public class DateUtils {
*/
public static Instant toInstant(long nanoSecondsSinceEpoch) {
if (nanoSecondsSinceEpoch < 0) {
throw new IllegalArgumentException("nanoseconds are [" + nanoSecondsSinceEpoch + "] are before the epoch in 1970 and cannot " +
throw new IllegalArgumentException("nanoseconds [" + nanoSecondsSinceEpoch + "] are before the epoch in 1970 and cannot " +
"be processed in nanosecond resolution");
}
if (nanoSecondsSinceEpoch == 0) {
@ -129,6 +131,24 @@ public class DateUtils {
return Instant.ofEpochSecond(seconds, nanos);
}
/**
* Convert a nanosecond timestamp in milliseconds
*
* @param milliSecondsSinceEpoch the millisecond since the epoch
* @return the nanoseconds since the epoch
*/
public static long toNanoSeconds(long milliSecondsSinceEpoch) {
if (milliSecondsSinceEpoch < 0) {
throw new IllegalArgumentException("milliSeconds [" + milliSecondsSinceEpoch + "] are before the epoch in 1970 and cannot " +
"be converted to nanoseconds");
} else if (milliSecondsSinceEpoch > MAX_NANOSECOND_IN_MILLIS) {
throw new IllegalArgumentException("milliSeconds [" + milliSecondsSinceEpoch + "] are after 2262-04-11T23:47:16.854775807 " +
"and cannot be converted to nanoseconds");
}
return milliSecondsSinceEpoch * 1_000_000;
}
/**
* Convert a nanosecond timestamp in milliseconds
*
@ -137,7 +157,7 @@ public class DateUtils {
*/
public static long toMilliSeconds(long nanoSecondsSinceEpoch) {
if (nanoSecondsSinceEpoch < 0) {
throw new IllegalArgumentException("nanoseconds are [" + nanoSecondsSinceEpoch + "] are before the epoch in 1970 and will " +
throw new IllegalArgumentException("nanoseconds are [" + nanoSecondsSinceEpoch + "] are before the epoch in 1970 and cannot " +
"be converted to milliseconds");
}

View File

@ -26,12 +26,15 @@ import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BitSet;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
import org.elasticsearch.index.fielddata.FieldData;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.plain.SortedNumericDVIndexFieldData;
import org.elasticsearch.search.MultiValueMode;
import java.io.IOException;
import java.util.function.Function;
/**
* Comparator source for long values.
@ -39,11 +42,19 @@ import java.io.IOException;
public class LongValuesComparatorSource extends IndexFieldData.XFieldComparatorSource {
private final IndexNumericFieldData indexFieldData;
private final Function<SortedNumericDocValues, SortedNumericDocValues> converter;
public LongValuesComparatorSource(IndexNumericFieldData indexFieldData, @Nullable Object missingValue, MultiValueMode sortMode,
Nested nested) {
public LongValuesComparatorSource(IndexNumericFieldData indexFieldData, @Nullable Object missingValue,
MultiValueMode sortMode, Nested nested) {
this(indexFieldData, missingValue, sortMode, nested, null);
}
public LongValuesComparatorSource(IndexNumericFieldData indexFieldData, @Nullable Object missingValue,
MultiValueMode sortMode, Nested nested,
Function<SortedNumericDocValues, SortedNumericDocValues> converter) {
super(missingValue, sortMode, nested);
this.indexFieldData = indexFieldData;
this.converter = converter;
}
@Override
@ -51,6 +62,17 @@ public class LongValuesComparatorSource extends IndexFieldData.XFieldComparatorS
return SortField.Type.LONG;
}
private SortedNumericDocValues loadDocValues(LeafReaderContext context) {
final AtomicNumericFieldData data = indexFieldData.load(context);
SortedNumericDocValues values;
if (data instanceof SortedNumericDVIndexFieldData.NanoSecondFieldData) {
values = ((SortedNumericDVIndexFieldData.NanoSecondFieldData) data).getLongValuesAsNanos();
} else {
values = data.getLongValues();
}
return converter != null ? converter.apply(values) : values;
}
@Override
public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
assert indexFieldData == null || fieldname.equals(indexFieldData.getFieldName());
@ -61,7 +83,7 @@ public class LongValuesComparatorSource extends IndexFieldData.XFieldComparatorS
return new FieldComparator.LongComparator(numHits, null, null) {
@Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
final SortedNumericDocValues values = indexFieldData.load(context).getLongValues();
final SortedNumericDocValues values = loadDocValues(context);
final NumericDocValues selectedValues;
if (nested == null) {
selectedValues = FieldData.replaceMissing(sortMode.select(values), dMissingValue);

View File

@ -48,6 +48,7 @@ import org.elasticsearch.search.MultiValueMode;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.function.LongUnaryOperator;
/**
* FieldData backed by {@link LeafReader#getSortedNumericDocValues(String)}
@ -69,8 +70,8 @@ public class SortedNumericDVIndexFieldData extends DocValuesIndexFieldData imple
* Values are casted to the provided <code>targetNumericType</code> type if it doesn't
* match the field's <code>numericType</code>.
*/
public SortField sortField(NumericType targetNumericType, Object missingValue, MultiValueMode sortMode, Nested nested,
boolean reverse) {
public SortField sortField(NumericType targetNumericType, Object missingValue, MultiValueMode sortMode,
Nested nested, boolean reverse) {
final XFieldComparatorSource source;
switch (targetNumericType) {
case HALF_FLOAT:
@ -82,6 +83,26 @@ public class SortedNumericDVIndexFieldData extends DocValuesIndexFieldData imple
source = new DoubleValuesComparatorSource(this, missingValue, sortMode, nested);
break;
case DATE:
if (numericType == NumericType.DATE_NANOSECONDS) {
// converts date values to nanosecond resolution
source = new LongValuesComparatorSource(this, missingValue,
sortMode, nested, dvs -> convertNanosToMillis(dvs));
} else {
source = new LongValuesComparatorSource(this, missingValue, sortMode, nested);
}
break;
case DATE_NANOSECONDS:
if (numericType == NumericType.DATE) {
// converts date_nanos values to millisecond resolution
source = new LongValuesComparatorSource(this, missingValue,
sortMode, nested, dvs -> convertMillisToNanos(dvs));
} else {
source = new LongValuesComparatorSource(this, missingValue, sortMode, nested);
}
break;
default:
assert !targetNumericType.isFloatingPoint();
source = new LongValuesComparatorSource(this, missingValue, sortMode, nested);
@ -93,9 +114,9 @@ public class SortedNumericDVIndexFieldData extends DocValuesIndexFieldData imple
* returns a custom sort field otherwise.
*/
if (nested != null
|| (sortMode != MultiValueMode.MAX && sortMode != MultiValueMode.MIN)
|| numericType == NumericType.HALF_FLOAT
|| targetNumericType != numericType) {
|| (sortMode != MultiValueMode.MAX && sortMode != MultiValueMode.MIN)
|| numericType == NumericType.HALF_FLOAT
|| targetNumericType != numericType) {
return new SortField(fieldName, source, reverse);
}
@ -171,29 +192,7 @@ public class SortedNumericDVIndexFieldData extends DocValuesIndexFieldData imple
@Override
public SortedNumericDocValues getLongValues() {
final SortedNumericDocValues dv = getLongValuesAsNanos();
return new AbstractSortedNumericDocValues() {
@Override
public boolean advanceExact(int target) throws IOException {
return dv.advanceExact(target);
}
@Override
public long nextValue() throws IOException {
return DateUtils.toMilliSeconds(dv.nextValue());
}
@Override
public int docValueCount() {
return dv.docValueCount();
}
@Override
public int nextDoc() throws IOException {
return dv.nextDoc();
}
};
return convertNanosToMillis(getLongValuesAsNanos());
}
public SortedNumericDocValues getLongValuesAsNanos() {
@ -463,4 +462,47 @@ public class SortedNumericDVIndexFieldData extends DocValuesIndexFieldData imple
return Collections.emptyList();
}
}
/**
* Convert the values in <code>dvs</code> from nanosecond to millisecond resolution.
*/
static SortedNumericDocValues convertNanosToMillis(SortedNumericDocValues dvs) {
return convertNumeric(dvs, DateUtils::toMilliSeconds);
}
/**
* Convert the values in <code>dvs</code> from millisecond to nanosecond resolution.
*/
static SortedNumericDocValues convertMillisToNanos(SortedNumericDocValues values) {
return convertNumeric(values, DateUtils::toNanoSeconds);
}
/**
* Convert the values in <code>dvs</code> using the provided <code>converter</code>.
*/
private static SortedNumericDocValues convertNumeric(SortedNumericDocValues values, LongUnaryOperator converter) {
return new AbstractSortedNumericDocValues() {
@Override
public boolean advanceExact(int target) throws IOException {
return values.advanceExact(target);
}
@Override
public long nextValue() throws IOException {
return converter.applyAsLong(values.nextValue());
}
@Override
public int docValueCount() {
return values.docValueCount();
}
@Override
public int nextDoc() throws IOException {
return values.nextDoc();
}
};
}
}

View File

@ -176,7 +176,7 @@ public final class DateFieldMapper extends FieldMapper {
return this;
}
Builder withResolution(Resolution resolution) {
public Builder withResolution(Resolution resolution) {
this.resolution = resolution;
return this;
}

View File

@ -304,16 +304,19 @@ public class FieldSortBuilder extends SortBuilder<FieldSortBuilder> {
* Allowed values are <code>long</code> and <code>double</code>.
*/
public FieldSortBuilder setNumericType(String numericType) {
String upperCase = numericType.toUpperCase(Locale.ENGLISH);
switch (upperCase) {
case "LONG":
case "DOUBLE":
String lowerCase = numericType.toLowerCase(Locale.ENGLISH);
switch (lowerCase) {
case "long":
case "double":
case "date":
case "date_nanos":
break;
default:
throw new IllegalArgumentException("invalid value for [numeric_type], must be [LONG, DOUBLE], got " + numericType);
throw new IllegalArgumentException("invalid value for [numeric_type], " +
"must be [long, double, date, date_nanos], got " + lowerCase);
}
this.numericType = upperCase;
this.numericType = lowerCase;
return this;
}
@ -348,6 +351,23 @@ public class FieldSortBuilder extends SortBuilder<FieldSortBuilder> {
return builder;
}
private static NumericType resolveNumericType(String value) {
switch (value) {
case "long":
return NumericType.LONG;
case "double":
return NumericType.DOUBLE;
case "date":
return NumericType.DATE;
case "date_nanos":
return NumericType.DATE_NANOSECONDS;
default:
throw new IllegalArgumentException("invalid value for [numeric_type], " +
"must be [long, double, date, date_nanos], got " + value);
}
}
@Override
public SortFieldAndFormat build(QueryShardContext context) throws IOException {
if (DOC_FIELD_NAME.equals(fieldName)) {
@ -404,7 +424,7 @@ public class FieldSortBuilder extends SortBuilder<FieldSortBuilder> {
"[numeric_type] option cannot be set on a non-numeric field, got " + fieldType.typeName());
}
SortedNumericDVIndexFieldData numericFieldData = (SortedNumericDVIndexFieldData) fieldData;
NumericType resolvedType = NumericType.valueOf(numericType);
NumericType resolvedType = resolveNumericType(numericType);
field = numericFieldData.sortField(resolvedType, missing, localSortMode, nested, reverse);
} else {
field = fieldData.sortField(missing, localSortMode, nested, reverse);

View File

@ -38,6 +38,7 @@ import java.util.Set;
import static org.elasticsearch.common.time.DateUtils.toInstant;
import static org.elasticsearch.common.time.DateUtils.toLong;
import static org.elasticsearch.common.time.DateUtils.toMilliSeconds;
import static org.elasticsearch.common.time.DateUtils.toNanoSeconds;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
@ -97,11 +98,11 @@ public class DateUtilsTests extends ESTestCase {
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> toInstant(-1));
assertThat(e.getMessage(),
is("nanoseconds are [-1] are before the epoch in 1970 and cannot be processed in nanosecond resolution"));
is("nanoseconds [-1] are before the epoch in 1970 and cannot be processed in nanosecond resolution"));
e = expectThrows(IllegalArgumentException.class, () -> toInstant(Long.MIN_VALUE));
assertThat(e.getMessage(),
is("nanoseconds are [" + Long.MIN_VALUE + "] are before the epoch in 1970 and cannot be processed in nanosecond resolution"));
is("nanoseconds [" + Long.MIN_VALUE + "] are before the epoch in 1970 and cannot be processed in nanosecond resolution"));
assertThat(toInstant(Long.MAX_VALUE),
is(ZonedDateTime.parse("2262-04-11T23:47:16.854775807Z").toInstant()));
@ -115,6 +116,22 @@ public class DateUtilsTests extends ESTestCase {
assertThat(toMilliSeconds(nowInNs), is(instant.toEpochMilli()));
}
public void testMillisToNanos() {
assertThat(toNanoSeconds(0), equalTo(0L));
Instant instant = Instant.ofEpochSecond(randomLongBetween(0, Long.MAX_VALUE) / 1_000_000_000L);
long nowInMs = instant.toEpochMilli();
assertThat(toNanoSeconds(nowInMs), equalTo(toLong(instant)));
IllegalArgumentException exc =
expectThrows(IllegalArgumentException.class, () -> toNanoSeconds(-1));
assertThat(exc.getMessage(), containsString("before the epoch"));
long millis = DateUtils.MAX_NANOSECOND_IN_MILLIS + randomLongBetween(0, 1000000);
exc = expectThrows(IllegalArgumentException.class, () -> toNanoSeconds(millis));
assertThat(exc.getMessage(), containsString("after 2262"));
}
private Instant createRandomInstant() {
long seconds = randomLongBetween(0, Long.MAX_VALUE) / 1_000_000_000L;
long nanos = randomLongBetween(0, 999_999_999L);

View File

@ -103,7 +103,7 @@ public class FieldSortBuilderTests extends AbstractSortTestCase<FieldSortBuilder
}
}
if (randomBoolean()) {
builder.setNumericType(randomFrom(random(), "long", "double"));
builder.setNumericType(randomFrom(random(), "long", "double", "date", "date_nanos"));
}
return builder;
}
@ -140,7 +140,8 @@ public class FieldSortBuilderTests extends AbstractSortTestCase<FieldSortBuilder
mutated.order(randomValueOtherThan(original.order(), () -> randomFrom(SortOrder.values())));
break;
case 5:
mutated.setNumericType(randomValueOtherThan(original.getNumericType(), () -> randomFrom("LONG", "DOUBLE")));
mutated.setNumericType(randomValueOtherThan(original.getNumericType(),
() -> randomFrom("long", "double", "date", "date_nanos")));
break;
default:
throw new IllegalStateException("Unsupported mutation.");

View File

@ -1690,12 +1690,90 @@ public class FieldSortIT extends ESIntegTestCase {
}
}
public void testCastDate() throws Exception {
assertAcked(prepareCreate("index_date")
.addMapping("_doc", "field", "type=date"));
assertAcked(prepareCreate("index_date_nanos")
.addMapping("_doc", "field", "type=date_nanos"));
ensureGreen("index_date", "index_date_nanos");
List<IndexRequestBuilder> builders = new ArrayList<>();
builders.add(client().prepareIndex("index_date", "_doc")
.setSource("field", "2024-04-11T23:47:17"));
builders.add(client().prepareIndex("index_date_nanos", "_doc")
.setSource("field", "2024-04-11T23:47:16.854775807Z"));
indexRandom(true, true, builders);
{
SearchResponse response = client().prepareSearch()
.setQuery(matchAllQuery())
.setSize(builders.size())
.addSort(SortBuilders.fieldSort("field").setNumericType("date"))
.get();
SearchHits hits = response.getHits();
assertEquals(2, hits.getHits().length);
for (int i = 0; i < 2; i++) {
assertThat(hits.getAt(i).getSortValues()[0].getClass(), equalTo(Long.class));
}
assertEquals(1712879236854L, hits.getAt(0).getSortValues()[0]);
assertEquals(1712879237000L, hits.getAt(1).getSortValues()[0]);
}
{
SearchResponse response = client().prepareSearch()
.setQuery(matchAllQuery())
.setSize(builders.size())
.addSort(SortBuilders.fieldSort("field").setNumericType("date_nanos"))
.get();
SearchHits hits = response.getHits();
assertEquals(2, hits.getHits().length);
for (int i = 0; i < 2; i++) {
assertThat(hits.getAt(i).getSortValues()[0].getClass(), equalTo(Long.class));
}
assertEquals(1712879236854775807L, hits.getAt(0).getSortValues()[0]);
assertEquals(1712879237000000000L, hits.getAt(1).getSortValues()[0]);
}
{
builders.clear();
builders.add(client().prepareIndex("index_date", "_doc")
.setSource("field", "1905-04-11T23:47:17"));
indexRandom(true, true, builders);
SearchPhaseExecutionException exc = expectThrows(SearchPhaseExecutionException.class,
() -> client().prepareSearch()
.setQuery(matchAllQuery())
.setSize(builders.size())
.setAllowPartialSearchResults(false)
.addSort(SortBuilders.fieldSort("field").setNumericType("date_nanos"))
.get()
);
assertThat(exc.toString(), containsString("are before the epoch in 1970"));
}
{
builders.clear();
builders.add(client().prepareIndex("index_date", "_doc")
.setSource("field", "2346-04-11T23:47:17"));
indexRandom(true, true, builders);
SearchPhaseExecutionException exc = expectThrows(SearchPhaseExecutionException.class,
() -> client().prepareSearch()
.setQuery(QueryBuilders.rangeQuery("field").gt("1970-01-01"))
.setSize(builders.size())
.setAllowPartialSearchResults(false)
.addSort(SortBuilders.fieldSort("field").setNumericType("date_nanos"))
.get()
);
assertThat(exc.toString(), containsString("are after 2262"));
}
}
public void testCastNumericTypeExceptions() throws Exception {
assertAcked(prepareCreate("index")
.addMapping("_doc", "keyword", "type=keyword", "ip", "type=ip"));
ensureGreen("index");
for (String invalidField : new String[] {"keyword", "ip"}) {
for (String numericType : new String[]{"long", "double"}) {
for (String numericType : new String[]{"long", "double", "date", "date_nanos"}) {
ElasticsearchException exc = expectThrows(ElasticsearchException.class, () -> client().prepareSearch()
.setQuery(matchAllQuery())
.addSort(SortBuilders.fieldSort(invalidField).setNumericType(numericType))