mirror of
synced 2025-03-28 10:58:30 +00:00
Add scaled_float
. #19264
This is a tentative to revive #15939 motivated by elastic/beats#1941. Half-floats are a pretty bad option for storing percentages. They would likely require 2 bytes all the time while they don't need more than one byte. So this PR exposes a new `scaled_float` type that requires a `scaling_factor` and internally indexes `value*scaling_factor` in a long field. Compared to the original PR it exposes a lower-level API so that the trade-offs are clearer and avoids any reference to fixed precision that might imply that this type is more accurate (actually it is *less* accurate). In addition to being more space-efficient for some use-cases that beats is interested in, this is also faster that `half_float` unless we can improve the efficiency of decoding half-float bits (which is currently done using software) or until Java gets first-class support for half-floats.
This commit is contained in:
@ -69,7 +69,7 @@ import java.util.Objects;
public class NumberFieldMapper extends FieldMapper implements AllFieldMapper.IncludeInAll {
public class NumberFieldMapper extends FieldMapper implements AllFieldMapper.IncludeInAll {
// this is private since it has a different default
// this is private since it has a different default
private static final Setting<Boolean> COERCE_SETTING =
static final Setting<Boolean> COERCE_SETTING =
Setting.boolSetting("index.mapping.coerce", true, Property.IndexScope);
Setting.boolSetting("index.mapping.coerce", true, Property.IndexScope);
public static class Builder extends FieldMapper.Builder<Builder, NumberFieldMapper> {
public static class Builder extends FieldMapper.Builder<Builder, NumberFieldMapper> {
@ -0,0 +1,616 @@
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.index.mapper.core;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.action.fieldstats.FieldStats;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentParser.Token;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
import org.elasticsearch.index.fielddata.FieldData;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.NumericDoubleValues;
import org.elasticsearch.index.fielddata.ScriptDocValues;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource;
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.core.LegacyNumberFieldMapper.Defaults;
import org.elasticsearch.index.mapper.internal.AllFieldMapper;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.indices.breaker.CircuitBreakerService;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.MultiValueMode;
import org.joda.time.DateTimeZone;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
/** A {@link FieldMapper} for scaled floats. Values are internally multiplied
* by a scaling factor and rounded to the closest long. */
public class ScaledFloatFieldMapper extends FieldMapper implements AllFieldMapper.IncludeInAll {
public static final String CONTENT_TYPE = "scaled_float";
// use the same default as numbers
private static final Setting<Boolean> COERCE_SETTING = NumberFieldMapper.COERCE_SETTING;
public static class Builder extends FieldMapper.Builder<Builder, ScaledFloatFieldMapper> {
private boolean scalingFactorSet = false;
private Boolean ignoreMalformed;
private Boolean coerce;
public Builder(String name) {
super(name, new ScaledFloatFieldType(), new ScaledFloatFieldType());
builder = this;
public Builder ignoreMalformed(boolean ignoreMalformed) {
this.ignoreMalformed = ignoreMalformed;
return builder;
protected Explicit<Boolean> ignoreMalformed(BuilderContext context) {
if (ignoreMalformed != null) {
return new Explicit<>(ignoreMalformed, true);
if (context.indexSettings() != null) {
return new Explicit<>(IGNORE_MALFORMED_SETTING.get(context.indexSettings()), false);
return Defaults.IGNORE_MALFORMED;
public Builder coerce(boolean coerce) {
this.coerce = coerce;
return builder;
public Builder scalingFactor(double scalingFactor) {
((ScaledFloatFieldType) fieldType).setScalingFactor(scalingFactor);
scalingFactorSet = true;
return this;
protected Explicit<Boolean> coerce(BuilderContext context) {
if (coerce != null) {
return new Explicit<>(coerce, true);
if (context.indexSettings() != null) {
return new Explicit<>(COERCE_SETTING.get(context.indexSettings()), false);
return Defaults.COERCE;
public ScaledFloatFieldMapper build(BuilderContext context) {
if (scalingFactorSet == false) {
throw new IllegalArgumentException("Field [" + name + "] misses required parameter [scaling_factor]");
ScaledFloatFieldMapper fieldMapper =
new ScaledFloatFieldMapper(name, fieldType, defaultFieldType, ignoreMalformed(context),
coerce(context), context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
return (ScaledFloatFieldMapper) fieldMapper.includeInAll(includeInAll);
public static class TypeParser implements Mapper.TypeParser {
public Mapper.Builder<?,?> parse(String name, Map<String, Object> node,
ParserContext parserContext) throws MapperParsingException {
Builder builder = new Builder(name);
TypeParsers.parseField(builder, name, node, parserContext);
for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext();) {
Map.Entry<String, Object> entry = iterator.next();
String propName = entry.getKey();
Object propNode = entry.getValue();
if (propName.equals("null_value")) {
if (propNode == null) {
throw new MapperParsingException("Property [null_value] cannot be null.");
} else if (propName.equals("ignore_malformed")) {
builder.ignoreMalformed(TypeParsers.nodeBooleanValue("ignore_malformed", propNode, parserContext));
} else if (propName.equals("coerce")) {
builder.coerce(TypeParsers.nodeBooleanValue("coerce", propNode, parserContext));
} else if (propName.equals("scaling_factor")) {
return builder;
public static final class ScaledFloatFieldType extends MappedFieldType {
private double scalingFactor;
public ScaledFloatFieldType() {
ScaledFloatFieldType(ScaledFloatFieldType other) {
this.scalingFactor = other.scalingFactor;
public double getScalingFactor() {
return scalingFactor;
public void setScalingFactor(double scalingFactor) {
this.scalingFactor = scalingFactor;
public MappedFieldType clone() {
return new ScaledFloatFieldType(this);
public String typeName() {
public void checkCompatibility(MappedFieldType other, List<String> conflicts, boolean strict) {
super.checkCompatibility(other, conflicts, strict);
if (scalingFactor != ((ScaledFloatFieldType) other).getScalingFactor()) {
conflicts.add("mapper [" + name() + "] has different [scaling_factor] values");
public Query termQuery(Object value, QueryShardContext context) {
double queryValue = NumberFieldMapper.NumberType.DOUBLE.parse(value).doubleValue();
long scaledValue = Math.round(queryValue * scalingFactor);
Query query = NumberFieldMapper.NumberType.LONG.termQuery(name(), scaledValue);
if (boost() != 1f) {
query = new BoostQuery(query, boost());
return query;
public Query termsQuery(List values, QueryShardContext context) {
List<Long> scaledValues = new ArrayList<>(values.size());
for (Object value : values) {
double queryValue = NumberFieldMapper.NumberType.DOUBLE.parse(value).doubleValue();
long scaledValue = Math.round(queryValue * scalingFactor);
Query query = NumberFieldMapper.NumberType.LONG.termsQuery(name(), Collections.unmodifiableList(scaledValues));
if (boost() != 1f) {
query = new BoostQuery(query, boost());
return query;
public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper) {
Long lo = null;
if (lowerTerm != null) {
double dValue = NumberFieldMapper.NumberType.DOUBLE.parse(lowerTerm).doubleValue();
if (includeLower == false) {
dValue = Math.nextUp(dValue);
lo = Math.round(Math.ceil(dValue * scalingFactor));
Long hi = null;
if (lowerTerm != null) {
double dValue = NumberFieldMapper.NumberType.DOUBLE.parse(upperTerm).doubleValue();
if (includeUpper == false) {
dValue = Math.nextDown(dValue);
hi = Math.round(Math.floor(dValue * scalingFactor));
Query query = NumberFieldMapper.NumberType.LONG.rangeQuery(name(), lo, hi, true, true);
if (boost() != 1f) {
query = new BoostQuery(query, boost());
return query;
public FieldStats<?> stats(IndexReader reader) throws IOException {
FieldStats.Long stats = (FieldStats.Long) NumberFieldMapper.NumberType.LONG.stats(
reader, name(), isSearchable(), isAggregatable());
if (stats == null) {
return null;
return new FieldStats.Double(stats.getMaxDoc(), stats.getDocCount(),
stats.getSumDocFreq(), stats.getSumTotalTermFreq(),
stats.isSearchable(), stats.isAggregatable(),
stats.getMinValue() == null ? null : stats.getMinValue() / scalingFactor,
stats.getMaxValue() == null ? null : stats.getMaxValue() / scalingFactor);
public IndexFieldData.Builder fielddataBuilder() {
return new IndexFieldData.Builder() {
public IndexFieldData<?> build(IndexSettings indexSettings, MappedFieldType fieldType, IndexFieldDataCache cache,
CircuitBreakerService breakerService, MapperService mapperService) {
final IndexNumericFieldData scaledValues = (IndexNumericFieldData) new DocValuesIndexFieldData.Builder()
.build(indexSettings, fieldType, cache, breakerService, mapperService);
return new ScaledFloatIndexFieldData(scaledValues, scalingFactor);
public Object valueForSearch(Object value) {
if (value == null) {
return null;
return ((Number) value).longValue() / scalingFactor;
public DocValueFormat docValueFormat(String format, DateTimeZone timeZone) {
if (timeZone != null) {
throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName()
+ "] does not support custom time zones");
if (format == null) {
return DocValueFormat.RAW;
} else {
return new DocValueFormat.Decimal(format);
public boolean equals(Object o) {
if (super.equals(o) == false) {
return false;
return scalingFactor == ((ScaledFloatFieldType) o).scalingFactor;
public int hashCode() {
return 31 * super.hashCode() + Double.hashCode(scalingFactor);
private Boolean includeInAll;
private Explicit<Boolean> ignoreMalformed;
private Explicit<Boolean> coerce;
private ScaledFloatFieldMapper(
String simpleName,
MappedFieldType fieldType,
MappedFieldType defaultFieldType,
Explicit<Boolean> ignoreMalformed,
Explicit<Boolean> coerce,
Settings indexSettings,
MultiFields multiFields,
CopyTo copyTo) {
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
final double scalingFactor = fieldType().getScalingFactor();
if (Double.isFinite(scalingFactor) == false || scalingFactor <= 0) {
throw new IllegalArgumentException("[scaling_factor] must be a positive number, got [" + scalingFactor + "]");
this.ignoreMalformed = ignoreMalformed;
this.coerce = coerce;
public ScaledFloatFieldType fieldType() {
return (ScaledFloatFieldType) super.fieldType();
protected String contentType() {
return fieldType.typeName();
protected ScaledFloatFieldMapper clone() {
return (ScaledFloatFieldMapper) super.clone();
public Mapper includeInAll(Boolean includeInAll) {
if (includeInAll != null) {
ScaledFloatFieldMapper clone = clone();
clone.includeInAll = includeInAll;
return clone;
} else {
return this;
public Mapper includeInAllIfNotSet(Boolean includeInAll) {
if (includeInAll != null && this.includeInAll == null) {
ScaledFloatFieldMapper clone = clone();
clone.includeInAll = includeInAll;
return clone;
} else {
return this;
public Mapper unsetIncludeInAll() {
if (includeInAll != null) {
ScaledFloatFieldMapper clone = clone();
clone.includeInAll = null;
return clone;
} else {
return this;
protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException {
XContentParser parser = context.parser();
Object value;
Number numericValue = null;
if (context.externalValueSet()) {
value = context.externalValue();
} else if (parser.currentToken() == Token.VALUE_NULL) {
value = null;
} else if (coerce.value()
&& parser.currentToken() == Token.VALUE_STRING
&& parser.textLength() == 0) {
value = null;
} else {
value = parser.textOrNull();
if (value != null) {
try {
numericValue = NumberFieldMapper.NumberType.DOUBLE.parse(parser, coerce.value());
} catch (IllegalArgumentException e) {
if (ignoreMalformed.value()) {
} else {
throw e;
if (value == null) {
value = fieldType().nullValue();
if (value == null) {
if (numericValue == null) {
numericValue = NumberFieldMapper.NumberType.DOUBLE.parse(value);
if (context.includeInAll(includeInAll, this)) {
context.allEntries().addText(fieldType().name(), value.toString(), fieldType().boost());
double doubleValue = numericValue.doubleValue();
if (Double.isFinite(doubleValue) == false) {
// since we encode to a long, we have no way to carry NaNs and infinities
throw new IllegalArgumentException("[scaled_float] only supports finite values, but got [" + doubleValue + "]");
long scaledValue = Math.round(doubleValue * fieldType().getScalingFactor());
boolean indexed = fieldType().indexOptions() != IndexOptions.NONE;
boolean docValued = fieldType().hasDocValues();
boolean stored = fieldType().stored();
fields.addAll(NumberFieldMapper.NumberType.LONG.createFields(fieldType().name(), scaledValue, indexed, docValued, stored));
protected void doMerge(Mapper mergeWith, boolean updateAllTypes) {
super.doMerge(mergeWith, updateAllTypes);
ScaledFloatFieldMapper other = (ScaledFloatFieldMapper) mergeWith;
this.includeInAll = other.includeInAll;
if (other.ignoreMalformed.explicit()) {
this.ignoreMalformed = other.ignoreMalformed;
if (other.coerce.explicit()) {
this.coerce = other.coerce;
protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException {
super.doXContentBody(builder, includeDefaults, params);
builder.field("scaling_factor", fieldType().getScalingFactor());
if (includeDefaults || ignoreMalformed.explicit()) {
builder.field("ignore_malformed", ignoreMalformed.value());
if (includeDefaults || coerce.explicit()) {
builder.field("coerce", coerce.value());
if (includeDefaults || fieldType().nullValue() != null) {
builder.field("null_value", fieldType().nullValue());
if (includeInAll != null) {
builder.field("include_in_all", includeInAll);
} else if (includeDefaults) {
builder.field("include_in_all", false);
private static class ScaledFloatIndexFieldData implements IndexNumericFieldData {
private final IndexNumericFieldData scaledFieldData;
private final double scalingFactor;
ScaledFloatIndexFieldData(IndexNumericFieldData scaledFieldData, double scalingFactor) {
this.scaledFieldData = scaledFieldData;
this.scalingFactor = scalingFactor;
public String getFieldName() {
return scaledFieldData.getFieldName();
public AtomicNumericFieldData load(LeafReaderContext context) {
return new ScaledFloatLeafFieldData(scaledFieldData.load(context), scalingFactor);
public AtomicNumericFieldData loadDirect(LeafReaderContext context) throws Exception {
return new ScaledFloatLeafFieldData(scaledFieldData.loadDirect(context), scalingFactor);
public org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource comparatorSource(Object missingValue,
MultiValueMode sortMode, Nested nested) {
return new DoubleValuesComparatorSource(this, missingValue, sortMode, nested);
public void clear() {
public Index index() {
return scaledFieldData.index();
public NumericType getNumericType() {
return scaledFieldData.getNumericType();
private static class ScaledFloatLeafFieldData implements AtomicNumericFieldData {
private final AtomicNumericFieldData scaledFieldData;
private final double scalingFactorInverse;
ScaledFloatLeafFieldData(AtomicNumericFieldData scaledFieldData, double scalingFactor) {
this.scaledFieldData = scaledFieldData;
this.scalingFactorInverse = 1d / scalingFactor;
public ScriptDocValues.Doubles getScriptValues() {
return new ScriptDocValues.Doubles(getDoubleValues());
public SortedBinaryDocValues getBytesValues() {
return FieldData.toString(getDoubleValues());
public long ramBytesUsed() {
return scaledFieldData.ramBytesUsed();
public void close() {
public SortedNumericDocValues getLongValues() {
return FieldData.castToLong(getDoubleValues());
public SortedNumericDoubleValues getDoubleValues() {
final SortedNumericDocValues values = scaledFieldData.getLongValues();
final NumericDocValues singleValues = DocValues.unwrapSingleton(values);
if (singleValues != null) {
return FieldData.singleton(new NumericDoubleValues() {
public double get(int docID) {
return singleValues.get(docID) * scalingFactorInverse;
}, DocValues.unwrapSingletonBits(values));
} else {
return new SortedNumericDoubleValues() {
public double valueAt(int index) {
return values.valueAt(index) * scalingFactorInverse;
public void setDocument(int doc) {
public int count() {
return values.count();
@ -39,6 +39,7 @@ import org.elasticsearch.index.mapper.core.StringFieldMapper;
import org.elasticsearch.index.mapper.core.TextFieldMapper;
import org.elasticsearch.index.mapper.core.TextFieldMapper;
import org.elasticsearch.index.mapper.core.TokenCountFieldMapper;
import org.elasticsearch.index.mapper.core.TokenCountFieldMapper;
import org.elasticsearch.index.mapper.core.NumberFieldMapper;
import org.elasticsearch.index.mapper.core.NumberFieldMapper;
import org.elasticsearch.index.mapper.core.ScaledFloatFieldMapper;
import org.elasticsearch.index.mapper.geo.GeoPointFieldMapper;
import org.elasticsearch.index.mapper.geo.GeoPointFieldMapper;
import org.elasticsearch.index.mapper.geo.GeoShapeFieldMapper;
import org.elasticsearch.index.mapper.geo.GeoShapeFieldMapper;
import org.elasticsearch.index.mapper.internal.AllFieldMapper;
import org.elasticsearch.index.mapper.internal.AllFieldMapper;
@ -105,6 +106,7 @@ public class IndicesModule extends AbstractModule {
mappers.put(BinaryFieldMapper.CONTENT_TYPE, new BinaryFieldMapper.TypeParser());
mappers.put(BinaryFieldMapper.CONTENT_TYPE, new BinaryFieldMapper.TypeParser());
mappers.put(DateFieldMapper.CONTENT_TYPE, new DateFieldMapper.TypeParser());
mappers.put(DateFieldMapper.CONTENT_TYPE, new DateFieldMapper.TypeParser());
mappers.put(IpFieldMapper.CONTENT_TYPE, new IpFieldMapper.TypeParser());
mappers.put(IpFieldMapper.CONTENT_TYPE, new IpFieldMapper.TypeParser());
mappers.put(ScaledFloatFieldMapper.CONTENT_TYPE, new ScaledFloatFieldMapper.TypeParser());
mappers.put(StringFieldMapper.CONTENT_TYPE, new StringFieldMapper.TypeParser());
mappers.put(StringFieldMapper.CONTENT_TYPE, new StringFieldMapper.TypeParser());
mappers.put(TextFieldMapper.CONTENT_TYPE, new TextFieldMapper.TypeParser());
mappers.put(TextFieldMapper.CONTENT_TYPE, new TextFieldMapper.TypeParser());
mappers.put(KeywordFieldMapper.CONTENT_TYPE, new KeywordFieldMapper.TypeParser());
mappers.put(KeywordFieldMapper.CONTENT_TYPE, new KeywordFieldMapper.TypeParser());
Normal file
Normal file
@ -0,0 +1,339 @@
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.index.mapper.core;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexableField;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.DocumentMapperParser;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.test.ESSingleNodeTestCase;
import org.junit.Before;
import java.io.IOException;
import static org.hamcrest.Matchers.containsString;
public class ScaledFloatFieldMapperTests extends ESSingleNodeTestCase {
IndexService indexService;
DocumentMapperParser parser;
public void before() {
indexService = createIndex("test");
parser = indexService.mapperService().documentMapperParser();
public void testDefaults() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", "scaled_float")
.field("scaling_factor", 10.0).endObject().endObject()
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
.field("field", 123)
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(2, fields.length);
IndexableField pointField = fields[0];
assertEquals(1, pointField.fieldType().pointDimensionCount());
assertEquals(1230, pointField.numericValue().longValue());
IndexableField dvField = fields[1];
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
assertEquals(1230, dvField.numericValue().longValue());
public void testMissingScalingFactor() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", "scaled_float").endObject().endObject()
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> parser.parse("type", new CompressedXContent(mapping)));
assertEquals("Field [field] misses required parameter [scaling_factor]", e.getMessage());
public void testIllegalScalingFactor() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", "scaled_float")
.field("scaling_factor", -1).endObject().endObject()
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> parser.parse("type", new CompressedXContent(mapping)));
assertEquals("[scaling_factor] must be a positive number, got [-1.0]", e.getMessage());
public void testNotIndexed() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", "scaled_float")
.field("index", false).field("scaling_factor", 10.0).endObject().endObject()
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
.field("field", 123)
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(1, fields.length);
IndexableField dvField = fields[0];
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
assertEquals(1230, dvField.numericValue().longValue());
public void testNoDocValues() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", "scaled_float")
.field("doc_values", false).field("scaling_factor", 10.0).endObject().endObject()
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
.field("field", 123)
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(1, fields.length);
IndexableField pointField = fields[0];
assertEquals(1, pointField.fieldType().pointDimensionCount());
assertEquals(1230, pointField.numericValue().longValue());
public void testStore() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", "scaled_float")
.field("store", true).field("scaling_factor", 10.0).endObject().endObject()
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
.field("field", 123)
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(3, fields.length);
IndexableField pointField = fields[0];
assertEquals(1, pointField.fieldType().pointDimensionCount());
assertEquals(1230, pointField.numericValue().doubleValue(), 0d);
IndexableField dvField = fields[1];
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
IndexableField storedField = fields[2];
assertEquals(1230, storedField.numericValue().longValue());
public void testCoerce() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", "scaled_float")
.field("scaling_factor", 10.0).endObject().endObject()
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
.field("field", "123")
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(2, fields.length);
IndexableField pointField = fields[0];
assertEquals(1, pointField.fieldType().pointDimensionCount());
assertEquals(1230, pointField.numericValue().longValue());
IndexableField dvField = fields[1];
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", "scaled_float")
.field("scaling_factor", 10.0).field("coerce", false).endObject().endObject()
DocumentMapper mapper2 = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper2.mappingSource().toString());
ThrowingRunnable runnable = () -> mapper2.parse("test", "type", "1", XContentFactory.jsonBuilder()
.field("field", "123")
MapperParsingException e = expectThrows(MapperParsingException.class, runnable);
assertThat(e.getCause().getMessage(), containsString("passed as String"));
public void testIgnoreMalformed() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", "scaled_float")
.field("scaling_factor", 10.0).endObject().endObject()
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
ThrowingRunnable runnable = () -> mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
.field("field", "a")
MapperParsingException e = expectThrows(MapperParsingException.class, runnable);
assertThat(e.getCause().getMessage(), containsString("For input string: \"a\""));
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", "scaled_float")
.field("scaling_factor", 10.0).field("ignore_malformed", true).endObject().endObject()
DocumentMapper mapper2 = parser.parse("type", new CompressedXContent(mapping));
ParsedDocument doc = mapper2.parse("test", "type", "1", XContentFactory.jsonBuilder()
.field("field", "a")
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(0, fields.length);
public void testIncludeInAll() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", "scaled_float")
.field("scaling_factor", 10.0).endObject().endObject()
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
.field("field", 123)
IndexableField[] fields = doc.rootDoc().getFields("_all");
assertEquals(1, fields.length);
assertEquals("123", fields[0].stringValue());
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.field("type", "scaled_float").field("scaling_factor", 10.0)
.field("include_in_all", false).endObject().endObject()
mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
.field("field", 123)
fields = doc.rootDoc().getFields("_all");
assertEquals(0, fields.length);
public void testNullValue() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject()
.field("type", "scaled_float")
.field("scaling_factor", 10.0)
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field"));
mapping = XContentFactory.jsonBuilder().startObject()
.field("type", "scaled_float")
.field("scaling_factor", 10.0)
.field("null_value", 2.5)
mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(2, fields.length);
IndexableField pointField = fields[0];
assertEquals(1, pointField.fieldType().pointDimensionCount());
assertEquals(25, pointField.numericValue().longValue());
IndexableField dvField = fields[1];
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
Normal file
Normal file
@ -0,0 +1,205 @@
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.index.mapper.core;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.elasticsearch.Version;
import org.elasticsearch.action.fieldstats.FieldStats;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
import org.elasticsearch.index.mapper.FieldTypeTestCase;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.junit.Before;
import java.io.IOException;
import java.util.Arrays;
public class ScaledFloatFieldTypeTests extends FieldTypeTestCase {
protected MappedFieldType createDefaultFieldType() {
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType();
return ft;
public void setupProperties() {
addModifier(new Modifier("scaling_factor", false) {
public void modify(MappedFieldType ft) {
ScaledFloatFieldMapper.ScaledFloatFieldType tft = (ScaledFloatFieldMapper.ScaledFloatFieldType)ft;
public void normalizeOther(MappedFieldType other) {
((ScaledFloatFieldMapper.ScaledFloatFieldType) other).setScalingFactor(100);
public void testTermQuery() {
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType();
ft.setScalingFactor(0.1 + randomDouble() * 100);
double value = (randomDouble() * 2 - 1) * 10000;
long scaledValue = Math.round(value * ft.getScalingFactor());
assertEquals(LongPoint.newExactQuery("scaled_float", scaledValue), ft.termQuery(value, null));
public void testTermsQuery() {
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType();
ft.setScalingFactor(0.1 + randomDouble() * 100);
double value1 = (randomDouble() * 2 - 1) * 10000;
long scaledValue1 = Math.round(value1 * ft.getScalingFactor());
double value2 = (randomDouble() * 2 - 1) * 10000;
long scaledValue2 = Math.round(value2 * ft.getScalingFactor());
LongPoint.newSetQuery("scaled_float", scaledValue1, scaledValue2),
ft.termsQuery(Arrays.asList(value1, value2), null));
public void testRangeQuery() throws IOException {
// make sure the accuracy loss of scaled floats only occurs at index time
// this test checks that searching scaled floats yields the same results as
// searching doubles that are rounded to the closest half float
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType();
ft.setScalingFactor(0.1 + randomDouble() * 100);
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
final int numDocs = 1000;
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
double value = (randomDouble() * 2 - 1) * 10000;
long scaledValue = Math.round(value * ft.getScalingFactor());
double rounded = scaledValue / ft.getScalingFactor();
doc.add(new LongPoint("scaled_float", scaledValue));
doc.add(new DoublePoint("double", rounded));
final DirectoryReader reader = DirectoryReader.open(w);
IndexSearcher searcher = newSearcher(reader);
final int numQueries = 1000;
for (int i = 0; i < numQueries; ++i) {
double l = (randomDouble() * 2 - 1) * 10000;
double u = (randomDouble() * 2 - 1) * 10000;
boolean includeLower = randomBoolean();
boolean includeUpper = randomBoolean();
Query doubleQ = NumberFieldMapper.NumberType.DOUBLE.rangeQuery("double", l, u, includeLower, includeUpper);
Query scaledFloatQ = ft.rangeQuery(l, u, includeLower, includeUpper);
assertEquals(searcher.count(doubleQ), searcher.count(scaledFloatQ));
IOUtils.close(reader, dir);
public void testValueForSearch() {
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType();
ft.setScalingFactor(0.1 + randomDouble() * 100);
assertEquals(10/ft.getScalingFactor(), ft.valueForSearch(10L));
public void testStats() throws IOException {
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType();
ft.setScalingFactor(0.1 + randomDouble() * 100);
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
try (DirectoryReader reader = DirectoryReader.open(w)) {
Document doc = new Document();
LongPoint point = new LongPoint("scaled_float", -1);
try (DirectoryReader reader = DirectoryReader.open(w)) {
FieldStats<?> stats = ft.stats(reader);
assertEquals(-1/ft.getScalingFactor(), stats.getMinValue());
assertEquals(10/ft.getScalingFactor(), stats.getMaxValue());
assertEquals(2, stats.getMaxDoc());
try (DirectoryReader reader = DirectoryReader.open(w)) {
IOUtils.close(w, dir);
public void testFieldData() throws IOException {
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType();
ft.setScalingFactor(0.1 + randomDouble() * 100);
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
Document doc = new Document();
doc.add(new SortedNumericDocValuesField("scaled_float1", 10));
doc.add(new SortedNumericDocValuesField("scaled_float2", 5));
doc.add(new SortedNumericDocValuesField("scaled_float2", 12));
try (DirectoryReader reader = DirectoryReader.open(w)) {
IndexMetaData indexMetadata = new IndexMetaData.Builder("index").settings(
.put("index.version.created", Version.CURRENT)
.put("index.number_of_shards", 1)
.put("index.number_of_replicas", 0).build()).build();
IndexSettings indexSettings = new IndexSettings(indexMetadata, Settings.EMPTY);
// single-valued
IndexNumericFieldData fielddata = (IndexNumericFieldData) ft.fielddataBuilder().build(indexSettings, ft, null, null, null);
AtomicNumericFieldData leafFieldData = fielddata.load(reader.leaves().get(0));
SortedNumericDoubleValues values = leafFieldData.getDoubleValues();
assertEquals(1, values.count());
assertEquals(10/ft.getScalingFactor(), values.valueAt(0), 10e-5);
// multi-valued
fielddata = (IndexNumericFieldData) ft.fielddataBuilder().build(indexSettings, ft, null, null, null);
leafFieldData = fielddata.load(reader.leaves().get(0));
values = leafFieldData.getDoubleValues();
assertEquals(2, values.count());
assertEquals(5/ft.getScalingFactor(), values.valueAt(0), 10e-5);
assertEquals(12/ft.getScalingFactor(), values.valueAt(1), 10e-5);
IOUtils.close(w, dir);
@ -152,8 +152,9 @@ space. They can be compressed more aggressively by using the `best_compression`
=== Use the smallest numeric type that is sufficient
=== Use the smallest numeric type that is sufficient
When storing <<number,numeric data>>, using `float` over `double`, or `half_float`
The type that you pick for <<number,numeric data>> can have a significant impact
over `float` can help save storage. This is also true for integer types, but less
on disk usage. In particular, integers should be stored using an integer type
since Elasticsearch will more easily compress them based on the number of bits
(`byte`, `short`, `integer` or `long`) and floating points should either be
that they actually need.
stored in a `scaled_float` if appropriate or in the smallest type that fits the
use-case: using `float` over `double`, or `half_float` over `float` will help
save storage.
@ -4,13 +4,14 @@
The following numeric types are supported:
The following numeric types are supported:
`long`:: A signed 64-bit integer with a minimum value of +-2^63^+ and a maximum value of +2^63^-1+.
`long`:: A signed 64-bit integer with a minimum value of +-2^63^+ and a maximum value of +2^63^-1+.
`integer`:: A signed 32-bit integer with a minimum value of +-2^31^+ and a maximum value of +2^31^-1+.
`integer`:: A signed 32-bit integer with a minimum value of +-2^31^+ and a maximum value of +2^31^-1+.
`short`:: A signed 16-bit integer with a minimum value of +-32,768+ and a maximum value of +32,767+.
`short`:: A signed 16-bit integer with a minimum value of +-32,768+ and a maximum value of +32,767+.
`byte`:: A signed 8-bit integer with a minimum value of +-128+ and a maximum value of +127+.
`byte`:: A signed 8-bit integer with a minimum value of +-128+ and a maximum value of +127+.
`double`:: A double-precision 64-bit IEEE 754 floating point.
`double`:: A double-precision 64-bit IEEE 754 floating point.
`float`:: A single-precision 32-bit IEEE 754 floating point.
`float`:: A single-precision 32-bit IEEE 754 floating point.
`half_float`:: A half-precision 16-bit IEEE 754 floating point.
`half_float`:: A half-precision 16-bit IEEE 754 floating point.
`scaled_float`:: A floating point that is backed by a `long` and a fixed scaling factor.
Below is an example of configuring a mapping with numeric fields:
Below is an example of configuring a mapping with numeric fields:
@ -26,6 +27,10 @@ PUT my_index
"time_in_seconds": {
"time_in_seconds": {
"type": "float"
"type": "float"
"price": {
"type": "scaled_float",
"scaling_factor": 100
@ -42,11 +47,24 @@ help indexing and searching be more efficient. Note however that given that
storage is optimized based on the actual values that are stored, picking one
storage is optimized based on the actual values that are stored, picking one
type over another one will have no impact on storage requirements.
type over another one will have no impact on storage requirements.
For floating-point types, picking the smallest type that is enough for the
For floating-point types, it is often more efficient to store floating-point
use-case will still help indexing and searching be more efficient. However,
data into an integer using a scaling factor, which is what the `scaled_float`
given that floating-point data is hard to compress, it might also have a
type does under the hood. For instance, a `price` field could be stored in a
significant impact on storage requirements. Here is a table that compares the
`scaled_float` with a `scaling_factor` of +100+. All APIs would work as if
3 floating-point types that are available in order to help make a decision.
the field was stored as a double, but under the hood elasticsearch would be
working with the number of cents, +price*100+, which is an integer. This is
mostly helpful to save disk space since integers are way easier to compress
than floating points. `scaled_float` is also fine to use in order to trade
accuracy for disk space. For instance imagine that you are tracking cpu
utilization as a number between +0+ and +1+. It usually does not matter much
whether cpu utilization is +12.7%+ or +13%+, so you could use a `scaled_float`
with a `scaling_factor` of +100+ in order to round cpu utilization to the
closest percent in order to save space.
If `scaled_float` is not a good fit, then you should pick the smallest type
that is enough for the use-case among the floating-point types: `double`,
`float` and `half_float`. Here is a table that compares these types in order
to help make a decision.
@ -56,12 +74,6 @@ significant impact on storage requirements. Here is a table that compares the
|`half_float`|+2^-24^+ |+65504+ |+11+ / +3.31+
|`half_float`|+2^-24^+ |+65504+ |+11+ / +3.31+
When possible, it is often more efficient to store floating-point data into an
integer using a scaling factor. For instance, it is more efficient to store
percentages as integers between 0 and 100 than as floating-point numbers between 0
and 1. Another example would be prices: it will be more efficient to store prices
as a number of cents, which is an integer, than as a floating-point number.
==== Parameters for numeric fields
==== Parameters for numeric fields
@ -114,4 +126,19 @@ The following parameters are accepted by numeric types:
the <<mapping-source-field,`_source`>> field. Accepts `true` or `false`
the <<mapping-source-field,`_source`>> field. Accepts `true` or `false`
==== Parameters for `scaled_float`
`scaled_float` accepts an additional parameter:
The scaling factor to use when encoding values. Values will be multiplied
by this factor at index time and rounded to the closest long value. For
instance, a `scaled_float` with a `scaling_factor` of +10+ would internally
store +2.34+ as +23+ and all search-time operations (queries, aggregations,
sorting) will behave as if the document had a value of +2.3+. High values
of `scaling_factor` improve accuracy but also increase space requirements.
This parameter is required.
Reference in New Issue
Block a user