Add an option to force the numeric type of a field sort (#38095) (#40084)

This change adds an option to the `FieldSortBuilder` that allows to transform the type
of a numeric field into another. Possible values for this option are `long` that transforms
the source field into an integer and `double` that transforms the source field into a floating point.
This new option is useful for cross-index search when the sort field is mapped differently on some
indices. For instance if a field is mapped as a floating point in one index and as an integer in another
it is possible to align the type for both indices using the `numeric_type` option:

```
{
   "sort": {
    "field": "my_field",
    "numeric_type": "double" <1>
   }
}
```

<1> Ensure that values for this field are transformed to a floating point if needed.
This commit is contained in:
Jim Ferenczi 2019-03-18 09:32:45 +01:00 committed by GitHub
parent 95024798c0
commit 5b73a1bc7d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 221 additions and 10 deletions

View File

@ -108,6 +108,73 @@ POST /_search
--------------------------------------------------
// CONSOLE
==== Sorting numeric fields
For numeric fields it is also possible to cast the values from one type
to another using the `numeric_type` option.
This option accepts the following values: [`"double", "long"`] and can be useful
for cross-index search if the sort field is mapped differently on some
indices.
Consider for instance these two indices:
[source,js]
--------------------------------------------------
PUT /index_double
{
"mappings": {
"properties": {
"field": { "type": "double" }
}
}
}
--------------------------------------------------
// CONSOLE
[source,js]
--------------------------------------------------
PUT /index_long
{
"mappings": {
"properties": {
"field": { "type": "long" }
}
}
}
--------------------------------------------------
// CONSOLE
// TEST[continued]
Since `field` is mapped as a `double` in the first index and as a `long`
in the second index, it is not possible to use this field to sort requests
that query both indices by default. However you can force the type to one
or the other with the `numeric_type` option in order to force a specific
type for all indices:
[source,js]
--------------------------------------------------
POST /index_long,index_double/_search
{
"sort" : [
{
"field" : {
"numeric_type" : "double"
}
}
]
}
--------------------------------------------------
// CONSOLE
// TEST[continued]
In the example above, values for the `index_long` index are casted to
a double in order to be compatible with the values produced by the
`index_double` index.
It is also possible to transform a floating point field into a `long`
but note that in this case floating points are replaced by the largest
value that is less than or equal (greater than or equal if the value
is negative) to the argument and is equal to a mathematical integer.
[[nested-sorting]]
==== Sorting within nested objects.

View File

@ -64,10 +64,15 @@ public class SortedNumericDVIndexFieldData extends DocValuesIndexFieldData imple
this.numericType = numericType;
}
@Override
public SortField sortField(Object missingValue, MultiValueMode sortMode, Nested nested, boolean reverse) {
/**
* Returns the {@link SortField} to used for sorting.
* Values are casted to the provided <code>targetNumericType</code> type if it doesn't
* match the field's <code>numericType</code>.
*/
public SortField sortField(NumericType targetNumericType, Object missingValue, MultiValueMode sortMode, Nested nested,
boolean reverse) {
final XFieldComparatorSource source;
switch (numericType) {
switch (targetNumericType) {
case HALF_FLOAT:
case FLOAT:
source = new FloatValuesComparatorSource(this, missingValue, sortMode, nested);
@ -78,7 +83,7 @@ public class SortedNumericDVIndexFieldData extends DocValuesIndexFieldData imple
break;
default:
assert !numericType.isFloatingPoint();
assert !targetNumericType.isFloatingPoint();
source = new LongValuesComparatorSource(this, missingValue, sortMode, nested);
break;
}
@ -88,8 +93,9 @@ public class SortedNumericDVIndexFieldData extends DocValuesIndexFieldData imple
* returns a custom sort field otherwise.
*/
if (nested != null
|| (sortMode != MultiValueMode.MAX && sortMode != MultiValueMode.MIN)
|| numericType == NumericType.HALF_FLOAT) {
|| (sortMode != MultiValueMode.MAX && sortMode != MultiValueMode.MIN)
|| numericType == NumericType.HALF_FLOAT
|| targetNumericType != numericType) {
return new SortField(fieldName, source, reverse);
}
@ -114,6 +120,11 @@ public class SortedNumericDVIndexFieldData extends DocValuesIndexFieldData imple
return sortField;
}
@Override
public SortField sortField(Object missingValue, MultiValueMode sortMode, Nested nested, boolean reverse) {
return sortField(numericType, missingValue, sortMode, nested, reverse);
}
@Override
public NumericType getNumericType() {
return numericType;

View File

@ -33,6 +33,8 @@ import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData.NumericType;
import org.elasticsearch.index.fielddata.plain.SortedNumericDVIndexFieldData;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryRewriteContext;
@ -42,6 +44,7 @@ import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.MultiValueMode;
import java.io.IOException;
import java.util.Locale;
import java.util.Objects;
import static org.elasticsearch.search.sort.NestedSortBuilder.NESTED_FIELD;
@ -56,6 +59,7 @@ public class FieldSortBuilder extends SortBuilder<FieldSortBuilder> {
public static final ParseField MISSING = new ParseField("missing");
public static final ParseField SORT_MODE = new ParseField("mode");
public static final ParseField UNMAPPED_TYPE = new ParseField("unmapped_type");
public static final ParseField NUMERIC_TYPE = new ParseField("numeric_type");
/**
* special field name to sort by index order
@ -72,6 +76,8 @@ public class FieldSortBuilder extends SortBuilder<FieldSortBuilder> {
private String unmappedType;
private String numericType;
private SortMode sortMode;
private QueryBuilder nestedFilter;
@ -94,6 +100,7 @@ public class FieldSortBuilder extends SortBuilder<FieldSortBuilder> {
if (template.getNestedSort() != null) {
this.setNestedSort(template.getNestedSort());
}
this.numericType = template.numericType;
}
/**
@ -123,6 +130,9 @@ public class FieldSortBuilder extends SortBuilder<FieldSortBuilder> {
if (in.getVersion().onOrAfter(Version.V_6_1_0)) {
nestedSort = in.readOptionalWriteable(NestedSortBuilder::new);
}
if (in.getVersion().onOrAfter(Version.V_7_1_0)) {
numericType = in.readOptionalString();
}
}
@Override
@ -137,6 +147,9 @@ public class FieldSortBuilder extends SortBuilder<FieldSortBuilder> {
if (out.getVersion().onOrAfter(Version.V_6_1_0)) {
out.writeOptionalWriteable(nestedSort);
}
if (out.getVersion().onOrAfter(Version.V_7_1_0)) {
out.writeOptionalString(numericType);
}
}
/** Returns the document field this sort should be based on. */
@ -274,6 +287,36 @@ public class FieldSortBuilder extends SortBuilder<FieldSortBuilder> {
return this;
}
/**
* Returns the numeric type that values should translated to or null
* if the original numeric type should be preserved.
*/
public String getNumericType() {
return numericType;
}
/**
* Forces the numeric type to use for the field. The query will fail if this option
* is set on a field that is not mapped as a numeric in some indices.
* Specifying a numeric type tells Elasticsearch what type the sort values should
* have, which is important for cross-index search, if a field does not have
* the same type on all indices.
* Allowed values are <code>long</code> and <code>double</code>.
*/
public FieldSortBuilder setNumericType(String numericType) {
String upperCase = numericType.toUpperCase(Locale.ENGLISH);
switch (upperCase) {
case "LONG":
case "DOUBLE":
break;
default:
throw new IllegalArgumentException("invalid value for [numeric_type], must be [LONG, DOUBLE], got " + numericType);
}
this.numericType = upperCase;
return this;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
@ -297,6 +340,9 @@ public class FieldSortBuilder extends SortBuilder<FieldSortBuilder> {
if (nestedSort != null) {
builder.field(NESTED_FIELD.getPreferredName(), nestedSort);
}
if (numericType != null) {
builder.field(NUMERIC_TYPE.getPreferredName(), numericType);
}
builder.endObject();
builder.endObject();
return builder;
@ -351,7 +397,18 @@ public class FieldSortBuilder extends SortBuilder<FieldSortBuilder> {
&& (sortMode == SortMode.SUM || sortMode == SortMode.AVG || sortMode == SortMode.MEDIAN)) {
throw new QueryShardException(context, "we only support AVG, MEDIAN and SUM on number based fields");
}
SortField field = fieldData.sortField(missing, localSortMode, nested, reverse);
final SortField field;
if (numericType != null) {
if (fieldData instanceof IndexNumericFieldData == false) {
throw new QueryShardException(context,
"[numeric_type] option cannot be set on a non-numeric field, got " + fieldType.typeName());
}
SortedNumericDVIndexFieldData numericFieldData = (SortedNumericDVIndexFieldData) fieldData;
NumericType resolvedType = NumericType.valueOf(numericType);
field = numericFieldData.sortField(resolvedType, missing, localSortMode, nested, reverse);
} else {
field = fieldData.sortField(missing, localSortMode, nested, reverse);
}
return new SortFieldAndFormat(field, fieldType.docValueFormat(null, null));
}
}
@ -370,13 +427,14 @@ public class FieldSortBuilder extends SortBuilder<FieldSortBuilder> {
return (Objects.equals(this.fieldName, builder.fieldName) && Objects.equals(this.nestedFilter, builder.nestedFilter)
&& Objects.equals(this.nestedPath, builder.nestedPath) && Objects.equals(this.missing, builder.missing)
&& Objects.equals(this.order, builder.order) && Objects.equals(this.sortMode, builder.sortMode)
&& Objects.equals(this.unmappedType, builder.unmappedType) && Objects.equals(this.nestedSort, builder.nestedSort));
&& Objects.equals(this.unmappedType, builder.unmappedType) && Objects.equals(this.nestedSort, builder.nestedSort))
&& Objects.equals(this.numericType, builder.numericType);
}
@Override
public int hashCode() {
return Objects.hash(this.fieldName, this.nestedFilter, this.nestedPath, this.nestedSort, this.missing, this.order, this.sortMode,
this.unmappedType);
this.unmappedType, this.numericType);
}
@Override
@ -413,6 +471,7 @@ public class FieldSortBuilder extends SortBuilder<FieldSortBuilder> {
return SortBuilder.parseNestedFilter(p);
}, NESTED_FILTER_FIELD);
PARSER.declareObject(FieldSortBuilder::setNestedSort, (p, c) -> NestedSortBuilder.fromXContent(p), NESTED_FIELD);
PARSER.declareString((b, v) -> b.setNumericType(v), NUMERIC_TYPE);
}
@Override

View File

@ -102,13 +102,16 @@ public class FieldSortBuilderTests extends AbstractSortTestCase<FieldSortBuilder
}
}
}
if (randomBoolean()) {
builder.setNumericType(randomFrom(random(), "long", "double"));
}
return builder;
}
@Override
protected FieldSortBuilder mutate(FieldSortBuilder original) throws IOException {
FieldSortBuilder mutated = new FieldSortBuilder(original);
int parameter = randomIntBetween(0, 4);
int parameter = randomIntBetween(0, 5);
switch (parameter) {
case 0:
if (original.getNestedPath() == null && original.getNestedFilter() == null) {
@ -136,6 +139,9 @@ public class FieldSortBuilderTests extends AbstractSortTestCase<FieldSortBuilder
case 4:
mutated.order(randomValueOtherThan(original.order(), () -> randomFrom(SortOrder.values())));
break;
case 5:
mutated.setNumericType(randomValueOtherThan(original.getNumericType(), () -> randomFrom("LONG", "DOUBLE")));
break;
default:
throw new IllegalStateException("Unsupported mutation.");
}

View File

@ -22,6 +22,7 @@ package org.elasticsearch.search.sort;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.admin.indices.alias.Alias;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
@ -36,6 +37,7 @@ import org.elasticsearch.index.fielddata.ScriptDocValues;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.script.MockScriptPlugin;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptType;
@ -1638,4 +1640,70 @@ public class FieldSortIT extends ESIntegTestCase {
assertEquals(100.2, hits.getAt(1).getSortValues()[0]);
assertEquals(120.3, hits.getAt(2).getSortValues()[0]);
}
public void testCastNumericType() throws Exception {
assertAcked(prepareCreate("index_double")
.addMapping("_doc", "field", "type=double"));
assertAcked(prepareCreate("index_long")
.addMapping("_doc", "field", "type=long"));
assertAcked(prepareCreate("index_float")
.addMapping("_doc", "field", "type=float"));
ensureGreen("index_double", "index_long", "index_float");
List<IndexRequestBuilder> builders = new ArrayList<>();
builders.add(client().prepareIndex("index_double", "_doc").setSource("field", 12.6));
builders.add(client().prepareIndex("index_long", "_doc").setSource("field", 12));
builders.add(client().prepareIndex("index_float", "_doc").setSource("field", 12.1));
indexRandom(true, true, builders);
{
SearchResponse response = client().prepareSearch()
.setQuery(matchAllQuery())
.setSize(builders.size())
.addSort(SortBuilders.fieldSort("field").setNumericType("long"))
.get();
SearchHits hits = response.getHits();
assertEquals(3, hits.getHits().length);
for (int i = 0; i < 3; i++) {
assertThat(hits.getAt(i).getSortValues()[0].getClass(), equalTo(Long.class));
}
assertEquals(12L, hits.getAt(0).getSortValues()[0]);
assertEquals(12L, hits.getAt(1).getSortValues()[0]);
assertEquals(12L, hits.getAt(2).getSortValues()[0]);
}
{
SearchResponse response = client().prepareSearch()
.setQuery(matchAllQuery())
.setSize(builders.size())
.addSort(SortBuilders.fieldSort("field").setNumericType("double"))
.get();
SearchHits hits = response.getHits();
assertEquals(3, hits.getHits().length);
for (int i = 0; i < 3; i++) {
assertThat(hits.getAt(i).getSortValues()[0].getClass(), equalTo(Double.class));
}
assertEquals(12D, hits.getAt(0).getSortValues()[0]);
assertEquals(12.1D, (double) hits.getAt(1).getSortValues()[0], 0.001f);
assertEquals(12.6D, hits.getAt(2).getSortValues()[0]);
}
}
public void testCastNumericTypeExceptions() throws Exception {
assertAcked(prepareCreate("index")
.addMapping("_doc", "keyword", "type=keyword", "ip", "type=ip"));
ensureGreen("index");
for (String invalidField : new String[] {"keyword", "ip"}) {
for (String numericType : new String[]{"long", "double"}) {
ElasticsearchException exc = expectThrows(ElasticsearchException.class, () -> client().prepareSearch()
.setQuery(matchAllQuery())
.addSort(SortBuilders.fieldSort(invalidField).setNumericType(numericType))
.get()
);
assertThat(exc.status(), equalTo(RestStatus.BAD_REQUEST));
assertThat(exc.getDetailedMessage(), containsString("[numeric_type] option cannot be set on a non-numeric field"));
}
}
}
}