mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-25 01:19:02 +00:00
Add scaled_float
. #19264
This is a tentative to revive #15939 motivated by elastic/beats#1941. Half-floats are a pretty bad option for storing percentages. They would likely require 2 bytes all the time while they don't need more than one byte. So this PR exposes a new `scaled_float` type that requires a `scaling_factor` and internally indexes `value*scaling_factor` in a long field. Compared to the original PR it exposes a lower-level API so that the trade-offs are clearer and avoids any reference to fixed precision that might imply that this type is more accurate (actually it is *less* accurate). In addition to being more space-efficient for some use-cases that beats is interested in, this is also faster that `half_float` unless we can improve the efficiency of decoding half-float bits (which is currently done using software) or until Java gets first-class support for half-floats.
This commit is contained in:
parent
59cda81b40
commit
398d70b567
@ -69,7 +69,7 @@ import java.util.Objects;
|
||||
public class NumberFieldMapper extends FieldMapper implements AllFieldMapper.IncludeInAll {
|
||||
|
||||
// this is private since it has a different default
|
||||
private static final Setting<Boolean> COERCE_SETTING =
|
||||
static final Setting<Boolean> COERCE_SETTING =
|
||||
Setting.boolSetting("index.mapping.coerce", true, Property.IndexScope);
|
||||
|
||||
public static class Builder extends FieldMapper.Builder<Builder, NumberFieldMapper> {
|
||||
|
@ -0,0 +1,616 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.mapper.core;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.elasticsearch.action.fieldstats.FieldStats;
|
||||
import org.elasticsearch.common.Explicit;
|
||||
import org.elasticsearch.common.settings.Setting;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.common.xcontent.XContentParser.Token;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
|
||||
import org.elasticsearch.index.fielddata.FieldData;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
|
||||
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
|
||||
import org.elasticsearch.index.fielddata.NumericDoubleValues;
|
||||
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
||||
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
|
||||
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource;
|
||||
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.mapper.Mapper;
|
||||
import org.elasticsearch.index.mapper.MapperParsingException;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.mapper.ParseContext;
|
||||
import org.elasticsearch.index.mapper.core.LegacyNumberFieldMapper.Defaults;
|
||||
import org.elasticsearch.index.mapper.internal.AllFieldMapper;
|
||||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
import org.elasticsearch.indices.breaker.CircuitBreakerService;
|
||||
import org.elasticsearch.search.DocValueFormat;
|
||||
import org.elasticsearch.search.MultiValueMode;
|
||||
import org.joda.time.DateTimeZone;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/** A {@link FieldMapper} for scaled floats. Values are internally multiplied
|
||||
* by a scaling factor and rounded to the closest long. */
|
||||
public class ScaledFloatFieldMapper extends FieldMapper implements AllFieldMapper.IncludeInAll {
|
||||
|
||||
public static final String CONTENT_TYPE = "scaled_float";
|
||||
// use the same default as numbers
|
||||
private static final Setting<Boolean> COERCE_SETTING = NumberFieldMapper.COERCE_SETTING;
|
||||
|
||||
public static class Builder extends FieldMapper.Builder<Builder, ScaledFloatFieldMapper> {
|
||||
|
||||
private boolean scalingFactorSet = false;
|
||||
private Boolean ignoreMalformed;
|
||||
private Boolean coerce;
|
||||
|
||||
public Builder(String name) {
|
||||
super(name, new ScaledFloatFieldType(), new ScaledFloatFieldType());
|
||||
builder = this;
|
||||
}
|
||||
|
||||
public Builder ignoreMalformed(boolean ignoreMalformed) {
|
||||
this.ignoreMalformed = ignoreMalformed;
|
||||
return builder;
|
||||
}
|
||||
|
||||
protected Explicit<Boolean> ignoreMalformed(BuilderContext context) {
|
||||
if (ignoreMalformed != null) {
|
||||
return new Explicit<>(ignoreMalformed, true);
|
||||
}
|
||||
if (context.indexSettings() != null) {
|
||||
return new Explicit<>(IGNORE_MALFORMED_SETTING.get(context.indexSettings()), false);
|
||||
}
|
||||
return Defaults.IGNORE_MALFORMED;
|
||||
}
|
||||
|
||||
public Builder coerce(boolean coerce) {
|
||||
this.coerce = coerce;
|
||||
return builder;
|
||||
}
|
||||
|
||||
public Builder scalingFactor(double scalingFactor) {
|
||||
((ScaledFloatFieldType) fieldType).setScalingFactor(scalingFactor);
|
||||
scalingFactorSet = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
protected Explicit<Boolean> coerce(BuilderContext context) {
|
||||
if (coerce != null) {
|
||||
return new Explicit<>(coerce, true);
|
||||
}
|
||||
if (context.indexSettings() != null) {
|
||||
return new Explicit<>(COERCE_SETTING.get(context.indexSettings()), false);
|
||||
}
|
||||
return Defaults.COERCE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScaledFloatFieldMapper build(BuilderContext context) {
|
||||
if (scalingFactorSet == false) {
|
||||
throw new IllegalArgumentException("Field [" + name + "] misses required parameter [scaling_factor]");
|
||||
}
|
||||
setupFieldType(context);
|
||||
ScaledFloatFieldMapper fieldMapper =
|
||||
new ScaledFloatFieldMapper(name, fieldType, defaultFieldType, ignoreMalformed(context),
|
||||
coerce(context), context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
|
||||
return (ScaledFloatFieldMapper) fieldMapper.includeInAll(includeInAll);
|
||||
}
|
||||
}
|
||||
|
||||
public static class TypeParser implements Mapper.TypeParser {
|
||||
|
||||
@Override
|
||||
public Mapper.Builder<?,?> parse(String name, Map<String, Object> node,
|
||||
ParserContext parserContext) throws MapperParsingException {
|
||||
Builder builder = new Builder(name);
|
||||
TypeParsers.parseField(builder, name, node, parserContext);
|
||||
for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext();) {
|
||||
Map.Entry<String, Object> entry = iterator.next();
|
||||
String propName = entry.getKey();
|
||||
Object propNode = entry.getValue();
|
||||
if (propName.equals("null_value")) {
|
||||
if (propNode == null) {
|
||||
throw new MapperParsingException("Property [null_value] cannot be null.");
|
||||
}
|
||||
builder.nullValue(NumberFieldMapper.NumberType.DOUBLE.parse(propNode));
|
||||
iterator.remove();
|
||||
} else if (propName.equals("ignore_malformed")) {
|
||||
builder.ignoreMalformed(TypeParsers.nodeBooleanValue("ignore_malformed", propNode, parserContext));
|
||||
iterator.remove();
|
||||
} else if (propName.equals("coerce")) {
|
||||
builder.coerce(TypeParsers.nodeBooleanValue("coerce", propNode, parserContext));
|
||||
iterator.remove();
|
||||
} else if (propName.equals("scaling_factor")) {
|
||||
builder.scalingFactor(NumberFieldMapper.NumberType.DOUBLE.parse(propNode).doubleValue());
|
||||
iterator.remove();
|
||||
}
|
||||
}
|
||||
return builder;
|
||||
}
|
||||
}
|
||||
|
||||
public static final class ScaledFloatFieldType extends MappedFieldType {
|
||||
|
||||
private double scalingFactor;
|
||||
|
||||
public ScaledFloatFieldType() {
|
||||
super();
|
||||
setTokenized(false);
|
||||
setHasDocValues(true);
|
||||
setOmitNorms(true);
|
||||
}
|
||||
|
||||
ScaledFloatFieldType(ScaledFloatFieldType other) {
|
||||
super(other);
|
||||
this.scalingFactor = other.scalingFactor;
|
||||
}
|
||||
|
||||
public double getScalingFactor() {
|
||||
return scalingFactor;
|
||||
}
|
||||
|
||||
public void setScalingFactor(double scalingFactor) {
|
||||
checkIfFrozen();
|
||||
this.scalingFactor = scalingFactor;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MappedFieldType clone() {
|
||||
return new ScaledFloatFieldType(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String typeName() {
|
||||
return CONTENT_TYPE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkCompatibility(MappedFieldType other, List<String> conflicts, boolean strict) {
|
||||
super.checkCompatibility(other, conflicts, strict);
|
||||
if (scalingFactor != ((ScaledFloatFieldType) other).getScalingFactor()) {
|
||||
conflicts.add("mapper [" + name() + "] has different [scaling_factor] values");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query termQuery(Object value, QueryShardContext context) {
|
||||
failIfNotIndexed();
|
||||
double queryValue = NumberFieldMapper.NumberType.DOUBLE.parse(value).doubleValue();
|
||||
long scaledValue = Math.round(queryValue * scalingFactor);
|
||||
Query query = NumberFieldMapper.NumberType.LONG.termQuery(name(), scaledValue);
|
||||
if (boost() != 1f) {
|
||||
query = new BoostQuery(query, boost());
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query termsQuery(List values, QueryShardContext context) {
|
||||
failIfNotIndexed();
|
||||
List<Long> scaledValues = new ArrayList<>(values.size());
|
||||
for (Object value : values) {
|
||||
double queryValue = NumberFieldMapper.NumberType.DOUBLE.parse(value).doubleValue();
|
||||
long scaledValue = Math.round(queryValue * scalingFactor);
|
||||
scaledValues.add(scaledValue);
|
||||
}
|
||||
Query query = NumberFieldMapper.NumberType.LONG.termsQuery(name(), Collections.unmodifiableList(scaledValues));
|
||||
if (boost() != 1f) {
|
||||
query = new BoostQuery(query, boost());
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper) {
|
||||
failIfNotIndexed();
|
||||
Long lo = null;
|
||||
if (lowerTerm != null) {
|
||||
double dValue = NumberFieldMapper.NumberType.DOUBLE.parse(lowerTerm).doubleValue();
|
||||
if (includeLower == false) {
|
||||
dValue = Math.nextUp(dValue);
|
||||
}
|
||||
lo = Math.round(Math.ceil(dValue * scalingFactor));
|
||||
}
|
||||
Long hi = null;
|
||||
if (lowerTerm != null) {
|
||||
double dValue = NumberFieldMapper.NumberType.DOUBLE.parse(upperTerm).doubleValue();
|
||||
if (includeUpper == false) {
|
||||
dValue = Math.nextDown(dValue);
|
||||
}
|
||||
hi = Math.round(Math.floor(dValue * scalingFactor));
|
||||
}
|
||||
Query query = NumberFieldMapper.NumberType.LONG.rangeQuery(name(), lo, hi, true, true);
|
||||
if (boost() != 1f) {
|
||||
query = new BoostQuery(query, boost());
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldStats<?> stats(IndexReader reader) throws IOException {
|
||||
FieldStats.Long stats = (FieldStats.Long) NumberFieldMapper.NumberType.LONG.stats(
|
||||
reader, name(), isSearchable(), isAggregatable());
|
||||
if (stats == null) {
|
||||
return null;
|
||||
}
|
||||
return new FieldStats.Double(stats.getMaxDoc(), stats.getDocCount(),
|
||||
stats.getSumDocFreq(), stats.getSumTotalTermFreq(),
|
||||
stats.isSearchable(), stats.isAggregatable(),
|
||||
stats.getMinValue() == null ? null : stats.getMinValue() / scalingFactor,
|
||||
stats.getMaxValue() == null ? null : stats.getMaxValue() / scalingFactor);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexFieldData.Builder fielddataBuilder() {
|
||||
failIfNoDocValues();
|
||||
return new IndexFieldData.Builder() {
|
||||
@Override
|
||||
public IndexFieldData<?> build(IndexSettings indexSettings, MappedFieldType fieldType, IndexFieldDataCache cache,
|
||||
CircuitBreakerService breakerService, MapperService mapperService) {
|
||||
final IndexNumericFieldData scaledValues = (IndexNumericFieldData) new DocValuesIndexFieldData.Builder()
|
||||
.numericType(IndexNumericFieldData.NumericType.LONG)
|
||||
.build(indexSettings, fieldType, cache, breakerService, mapperService);
|
||||
return new ScaledFloatIndexFieldData(scaledValues, scalingFactor);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object valueForSearch(Object value) {
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
return ((Number) value).longValue() / scalingFactor;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValueFormat docValueFormat(String format, DateTimeZone timeZone) {
|
||||
if (timeZone != null) {
|
||||
throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName()
|
||||
+ "] does not support custom time zones");
|
||||
}
|
||||
if (format == null) {
|
||||
return DocValueFormat.RAW;
|
||||
} else {
|
||||
return new DocValueFormat.Decimal(format);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (super.equals(o) == false) {
|
||||
return false;
|
||||
}
|
||||
return scalingFactor == ((ScaledFloatFieldType) o).scalingFactor;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return 31 * super.hashCode() + Double.hashCode(scalingFactor);
|
||||
}
|
||||
}
|
||||
|
||||
private Boolean includeInAll;
|
||||
|
||||
private Explicit<Boolean> ignoreMalformed;
|
||||
|
||||
private Explicit<Boolean> coerce;
|
||||
|
||||
private ScaledFloatFieldMapper(
|
||||
String simpleName,
|
||||
MappedFieldType fieldType,
|
||||
MappedFieldType defaultFieldType,
|
||||
Explicit<Boolean> ignoreMalformed,
|
||||
Explicit<Boolean> coerce,
|
||||
Settings indexSettings,
|
||||
MultiFields multiFields,
|
||||
CopyTo copyTo) {
|
||||
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
|
||||
final double scalingFactor = fieldType().getScalingFactor();
|
||||
if (Double.isFinite(scalingFactor) == false || scalingFactor <= 0) {
|
||||
throw new IllegalArgumentException("[scaling_factor] must be a positive number, got [" + scalingFactor + "]");
|
||||
}
|
||||
this.ignoreMalformed = ignoreMalformed;
|
||||
this.coerce = coerce;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScaledFloatFieldType fieldType() {
|
||||
return (ScaledFloatFieldType) super.fieldType();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String contentType() {
|
||||
return fieldType.typeName();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ScaledFloatFieldMapper clone() {
|
||||
return (ScaledFloatFieldMapper) super.clone();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Mapper includeInAll(Boolean includeInAll) {
|
||||
if (includeInAll != null) {
|
||||
ScaledFloatFieldMapper clone = clone();
|
||||
clone.includeInAll = includeInAll;
|
||||
return clone;
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Mapper includeInAllIfNotSet(Boolean includeInAll) {
|
||||
if (includeInAll != null && this.includeInAll == null) {
|
||||
ScaledFloatFieldMapper clone = clone();
|
||||
clone.includeInAll = includeInAll;
|
||||
return clone;
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Mapper unsetIncludeInAll() {
|
||||
if (includeInAll != null) {
|
||||
ScaledFloatFieldMapper clone = clone();
|
||||
clone.includeInAll = null;
|
||||
return clone;
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException {
|
||||
XContentParser parser = context.parser();
|
||||
Object value;
|
||||
Number numericValue = null;
|
||||
if (context.externalValueSet()) {
|
||||
value = context.externalValue();
|
||||
} else if (parser.currentToken() == Token.VALUE_NULL) {
|
||||
value = null;
|
||||
} else if (coerce.value()
|
||||
&& parser.currentToken() == Token.VALUE_STRING
|
||||
&& parser.textLength() == 0) {
|
||||
value = null;
|
||||
} else {
|
||||
value = parser.textOrNull();
|
||||
if (value != null) {
|
||||
try {
|
||||
numericValue = NumberFieldMapper.NumberType.DOUBLE.parse(parser, coerce.value());
|
||||
} catch (IllegalArgumentException e) {
|
||||
if (ignoreMalformed.value()) {
|
||||
return;
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (value == null) {
|
||||
value = fieldType().nullValue();
|
||||
}
|
||||
|
||||
if (value == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (numericValue == null) {
|
||||
numericValue = NumberFieldMapper.NumberType.DOUBLE.parse(value);
|
||||
}
|
||||
|
||||
if (context.includeInAll(includeInAll, this)) {
|
||||
context.allEntries().addText(fieldType().name(), value.toString(), fieldType().boost());
|
||||
}
|
||||
|
||||
double doubleValue = numericValue.doubleValue();
|
||||
if (Double.isFinite(doubleValue) == false) {
|
||||
// since we encode to a long, we have no way to carry NaNs and infinities
|
||||
throw new IllegalArgumentException("[scaled_float] only supports finite values, but got [" + doubleValue + "]");
|
||||
}
|
||||
long scaledValue = Math.round(doubleValue * fieldType().getScalingFactor());
|
||||
|
||||
boolean indexed = fieldType().indexOptions() != IndexOptions.NONE;
|
||||
boolean docValued = fieldType().hasDocValues();
|
||||
boolean stored = fieldType().stored();
|
||||
fields.addAll(NumberFieldMapper.NumberType.LONG.createFields(fieldType().name(), scaledValue, indexed, docValued, stored));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doMerge(Mapper mergeWith, boolean updateAllTypes) {
|
||||
super.doMerge(mergeWith, updateAllTypes);
|
||||
ScaledFloatFieldMapper other = (ScaledFloatFieldMapper) mergeWith;
|
||||
this.includeInAll = other.includeInAll;
|
||||
if (other.ignoreMalformed.explicit()) {
|
||||
this.ignoreMalformed = other.ignoreMalformed;
|
||||
}
|
||||
if (other.coerce.explicit()) {
|
||||
this.coerce = other.coerce;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException {
|
||||
super.doXContentBody(builder, includeDefaults, params);
|
||||
|
||||
builder.field("scaling_factor", fieldType().getScalingFactor());
|
||||
|
||||
if (includeDefaults || ignoreMalformed.explicit()) {
|
||||
builder.field("ignore_malformed", ignoreMalformed.value());
|
||||
}
|
||||
if (includeDefaults || coerce.explicit()) {
|
||||
builder.field("coerce", coerce.value());
|
||||
}
|
||||
|
||||
if (includeDefaults || fieldType().nullValue() != null) {
|
||||
builder.field("null_value", fieldType().nullValue());
|
||||
}
|
||||
|
||||
if (includeInAll != null) {
|
||||
builder.field("include_in_all", includeInAll);
|
||||
} else if (includeDefaults) {
|
||||
builder.field("include_in_all", false);
|
||||
}
|
||||
}
|
||||
|
||||
private static class ScaledFloatIndexFieldData implements IndexNumericFieldData {
|
||||
|
||||
private final IndexNumericFieldData scaledFieldData;
|
||||
private final double scalingFactor;
|
||||
|
||||
ScaledFloatIndexFieldData(IndexNumericFieldData scaledFieldData, double scalingFactor) {
|
||||
this.scaledFieldData = scaledFieldData;
|
||||
this.scalingFactor = scalingFactor;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getFieldName() {
|
||||
return scaledFieldData.getFieldName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public AtomicNumericFieldData load(LeafReaderContext context) {
|
||||
return new ScaledFloatLeafFieldData(scaledFieldData.load(context), scalingFactor);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AtomicNumericFieldData loadDirect(LeafReaderContext context) throws Exception {
|
||||
return new ScaledFloatLeafFieldData(scaledFieldData.loadDirect(context), scalingFactor);
|
||||
}
|
||||
|
||||
@Override
|
||||
public org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource comparatorSource(Object missingValue,
|
||||
MultiValueMode sortMode, Nested nested) {
|
||||
return new DoubleValuesComparatorSource(this, missingValue, sortMode, nested);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
scaledFieldData.clear();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Index index() {
|
||||
return scaledFieldData.index();
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericType getNumericType() {
|
||||
return scaledFieldData.getNumericType();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static class ScaledFloatLeafFieldData implements AtomicNumericFieldData {
|
||||
|
||||
private final AtomicNumericFieldData scaledFieldData;
|
||||
private final double scalingFactorInverse;
|
||||
|
||||
ScaledFloatLeafFieldData(AtomicNumericFieldData scaledFieldData, double scalingFactor) {
|
||||
this.scaledFieldData = scaledFieldData;
|
||||
this.scalingFactorInverse = 1d / scalingFactor;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScriptDocValues.Doubles getScriptValues() {
|
||||
return new ScriptDocValues.Doubles(getDoubleValues());
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedBinaryDocValues getBytesValues() {
|
||||
return FieldData.toString(getDoubleValues());
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return scaledFieldData.ramBytesUsed();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
scaledFieldData.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedNumericDocValues getLongValues() {
|
||||
return FieldData.castToLong(getDoubleValues());
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedNumericDoubleValues getDoubleValues() {
|
||||
final SortedNumericDocValues values = scaledFieldData.getLongValues();
|
||||
final NumericDocValues singleValues = DocValues.unwrapSingleton(values);
|
||||
if (singleValues != null) {
|
||||
return FieldData.singleton(new NumericDoubleValues() {
|
||||
@Override
|
||||
public double get(int docID) {
|
||||
return singleValues.get(docID) * scalingFactorInverse;
|
||||
}
|
||||
}, DocValues.unwrapSingletonBits(values));
|
||||
} else {
|
||||
return new SortedNumericDoubleValues() {
|
||||
|
||||
@Override
|
||||
public double valueAt(int index) {
|
||||
return values.valueAt(index) * scalingFactorInverse;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setDocument(int doc) {
|
||||
values.setDocument(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int count() {
|
||||
return values.count();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -39,6 +39,7 @@ import org.elasticsearch.index.mapper.core.StringFieldMapper;
|
||||
import org.elasticsearch.index.mapper.core.TextFieldMapper;
|
||||
import org.elasticsearch.index.mapper.core.TokenCountFieldMapper;
|
||||
import org.elasticsearch.index.mapper.core.NumberFieldMapper;
|
||||
import org.elasticsearch.index.mapper.core.ScaledFloatFieldMapper;
|
||||
import org.elasticsearch.index.mapper.geo.GeoPointFieldMapper;
|
||||
import org.elasticsearch.index.mapper.geo.GeoShapeFieldMapper;
|
||||
import org.elasticsearch.index.mapper.internal.AllFieldMapper;
|
||||
@ -105,6 +106,7 @@ public class IndicesModule extends AbstractModule {
|
||||
mappers.put(BinaryFieldMapper.CONTENT_TYPE, new BinaryFieldMapper.TypeParser());
|
||||
mappers.put(DateFieldMapper.CONTENT_TYPE, new DateFieldMapper.TypeParser());
|
||||
mappers.put(IpFieldMapper.CONTENT_TYPE, new IpFieldMapper.TypeParser());
|
||||
mappers.put(ScaledFloatFieldMapper.CONTENT_TYPE, new ScaledFloatFieldMapper.TypeParser());
|
||||
mappers.put(StringFieldMapper.CONTENT_TYPE, new StringFieldMapper.TypeParser());
|
||||
mappers.put(TextFieldMapper.CONTENT_TYPE, new TextFieldMapper.TypeParser());
|
||||
mappers.put(KeywordFieldMapper.CONTENT_TYPE, new KeywordFieldMapper.TypeParser());
|
||||
|
@ -0,0 +1,339 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.mapper.core;
|
||||
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.elasticsearch.common.compress.CompressedXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.index.IndexService;
|
||||
import org.elasticsearch.index.mapper.DocumentMapper;
|
||||
import org.elasticsearch.index.mapper.DocumentMapperParser;
|
||||
import org.elasticsearch.index.mapper.MapperParsingException;
|
||||
import org.elasticsearch.index.mapper.ParsedDocument;
|
||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||
import org.junit.Before;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.hamcrest.Matchers.containsString;
|
||||
|
||||
public class ScaledFloatFieldMapperTests extends ESSingleNodeTestCase {
|
||||
|
||||
IndexService indexService;
|
||||
DocumentMapperParser parser;
|
||||
|
||||
@Before
|
||||
public void before() {
|
||||
indexService = createIndex("test");
|
||||
parser = indexService.mapperService().documentMapperParser();
|
||||
}
|
||||
|
||||
public void testDefaults() throws Exception {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", "scaled_float")
|
||||
.field("scaling_factor", 10.0).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", 123)
|
||||
.endObject()
|
||||
.bytes());
|
||||
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(2, fields.length);
|
||||
IndexableField pointField = fields[0];
|
||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
||||
assertFalse(pointField.fieldType().stored());
|
||||
assertEquals(1230, pointField.numericValue().longValue());
|
||||
IndexableField dvField = fields[1];
|
||||
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
|
||||
assertEquals(1230, dvField.numericValue().longValue());
|
||||
assertFalse(dvField.fieldType().stored());
|
||||
}
|
||||
|
||||
public void testMissingScalingFactor() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", "scaled_float").endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> parser.parse("type", new CompressedXContent(mapping)));
|
||||
assertEquals("Field [field] misses required parameter [scaling_factor]", e.getMessage());
|
||||
}
|
||||
|
||||
public void testIllegalScalingFactor() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", "scaled_float")
|
||||
.field("scaling_factor", -1).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> parser.parse("type", new CompressedXContent(mapping)));
|
||||
assertEquals("[scaling_factor] must be a positive number, got [-1.0]", e.getMessage());
|
||||
}
|
||||
|
||||
public void testNotIndexed() throws Exception {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", "scaled_float")
|
||||
.field("index", false).field("scaling_factor", 10.0).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", 123)
|
||||
.endObject()
|
||||
.bytes());
|
||||
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(1, fields.length);
|
||||
IndexableField dvField = fields[0];
|
||||
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
|
||||
assertEquals(1230, dvField.numericValue().longValue());
|
||||
}
|
||||
|
||||
public void testNoDocValues() throws Exception {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", "scaled_float")
|
||||
.field("doc_values", false).field("scaling_factor", 10.0).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", 123)
|
||||
.endObject()
|
||||
.bytes());
|
||||
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(1, fields.length);
|
||||
IndexableField pointField = fields[0];
|
||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
||||
assertEquals(1230, pointField.numericValue().longValue());
|
||||
}
|
||||
|
||||
public void testStore() throws Exception {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", "scaled_float")
|
||||
.field("store", true).field("scaling_factor", 10.0).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", 123)
|
||||
.endObject()
|
||||
.bytes());
|
||||
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(3, fields.length);
|
||||
IndexableField pointField = fields[0];
|
||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
||||
assertEquals(1230, pointField.numericValue().doubleValue(), 0d);
|
||||
IndexableField dvField = fields[1];
|
||||
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
|
||||
IndexableField storedField = fields[2];
|
||||
assertTrue(storedField.fieldType().stored());
|
||||
assertEquals(1230, storedField.numericValue().longValue());
|
||||
}
|
||||
|
||||
public void testCoerce() throws Exception {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", "scaled_float")
|
||||
.field("scaling_factor", 10.0).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", "123")
|
||||
.endObject()
|
||||
.bytes());
|
||||
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(2, fields.length);
|
||||
IndexableField pointField = fields[0];
|
||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
||||
assertEquals(1230, pointField.numericValue().longValue());
|
||||
IndexableField dvField = fields[1];
|
||||
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
|
||||
|
||||
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", "scaled_float")
|
||||
.field("scaling_factor", 10.0).field("coerce", false).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper2 = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper2.mappingSource().toString());
|
||||
|
||||
ThrowingRunnable runnable = () -> mapper2.parse("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", "123")
|
||||
.endObject()
|
||||
.bytes());
|
||||
MapperParsingException e = expectThrows(MapperParsingException.class, runnable);
|
||||
assertThat(e.getCause().getMessage(), containsString("passed as String"));
|
||||
}
|
||||
|
||||
public void testIgnoreMalformed() throws Exception {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", "scaled_float")
|
||||
.field("scaling_factor", 10.0).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
ThrowingRunnable runnable = () -> mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", "a")
|
||||
.endObject()
|
||||
.bytes());
|
||||
MapperParsingException e = expectThrows(MapperParsingException.class, runnable);
|
||||
assertThat(e.getCause().getMessage(), containsString("For input string: \"a\""));
|
||||
|
||||
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", "scaled_float")
|
||||
.field("scaling_factor", 10.0).field("ignore_malformed", true).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper2 = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
ParsedDocument doc = mapper2.parse("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", "a")
|
||||
.endObject()
|
||||
.bytes());
|
||||
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(0, fields.length);
|
||||
}
|
||||
|
||||
public void testIncludeInAll() throws Exception {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", "scaled_float")
|
||||
.field("scaling_factor", 10.0).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", 123)
|
||||
.endObject()
|
||||
.bytes());
|
||||
|
||||
IndexableField[] fields = doc.rootDoc().getFields("_all");
|
||||
assertEquals(1, fields.length);
|
||||
assertEquals("123", fields[0].stringValue());
|
||||
|
||||
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", "scaled_float").field("scaling_factor", 10.0)
|
||||
.field("include_in_all", false).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", 123)
|
||||
.endObject()
|
||||
.bytes());
|
||||
|
||||
fields = doc.rootDoc().getFields("_all");
|
||||
assertEquals(0, fields.length);
|
||||
}
|
||||
|
||||
public void testNullValue() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject()
|
||||
.startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("field")
|
||||
.field("type", "scaled_float")
|
||||
.field("scaling_factor", 10.0)
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
ParsedDocument doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.nullField("field")
|
||||
.endObject()
|
||||
.bytes());
|
||||
assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field"));
|
||||
|
||||
mapping = XContentFactory.jsonBuilder().startObject()
|
||||
.startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("field")
|
||||
.field("type", "scaled_float")
|
||||
.field("scaling_factor", 10.0)
|
||||
.field("null_value", 2.5)
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
doc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.nullField("field")
|
||||
.endObject()
|
||||
.bytes());
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(2, fields.length);
|
||||
IndexableField pointField = fields[0];
|
||||
assertEquals(1, pointField.fieldType().pointDimensionCount());
|
||||
assertFalse(pointField.fieldType().stored());
|
||||
assertEquals(25, pointField.numericValue().longValue());
|
||||
IndexableField dvField = fields[1];
|
||||
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
|
||||
assertFalse(dvField.fieldType().stored());
|
||||
}
|
||||
}
|
@ -0,0 +1,205 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.mapper.core;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.DoublePoint;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.action.fieldstats.FieldStats;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
|
||||
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
|
||||
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
|
||||
import org.elasticsearch.index.mapper.FieldTypeTestCase;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.junit.Before;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
public class ScaledFloatFieldTypeTests extends FieldTypeTestCase {
|
||||
|
||||
@Override
|
||||
protected MappedFieldType createDefaultFieldType() {
|
||||
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType();
|
||||
ft.setScalingFactor(100);
|
||||
return ft;
|
||||
}
|
||||
|
||||
@Before
|
||||
public void setupProperties() {
|
||||
addModifier(new Modifier("scaling_factor", false) {
|
||||
@Override
|
||||
public void modify(MappedFieldType ft) {
|
||||
ScaledFloatFieldMapper.ScaledFloatFieldType tft = (ScaledFloatFieldMapper.ScaledFloatFieldType)ft;
|
||||
tft.setScalingFactor(10);
|
||||
}
|
||||
@Override
|
||||
public void normalizeOther(MappedFieldType other) {
|
||||
super.normalizeOther(other);
|
||||
((ScaledFloatFieldMapper.ScaledFloatFieldType) other).setScalingFactor(100);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public void testTermQuery() {
|
||||
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType();
|
||||
ft.setName("scaled_float");
|
||||
ft.setScalingFactor(0.1 + randomDouble() * 100);
|
||||
double value = (randomDouble() * 2 - 1) * 10000;
|
||||
long scaledValue = Math.round(value * ft.getScalingFactor());
|
||||
assertEquals(LongPoint.newExactQuery("scaled_float", scaledValue), ft.termQuery(value, null));
|
||||
}
|
||||
|
||||
public void testTermsQuery() {
|
||||
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType();
|
||||
ft.setName("scaled_float");
|
||||
ft.setScalingFactor(0.1 + randomDouble() * 100);
|
||||
double value1 = (randomDouble() * 2 - 1) * 10000;
|
||||
long scaledValue1 = Math.round(value1 * ft.getScalingFactor());
|
||||
double value2 = (randomDouble() * 2 - 1) * 10000;
|
||||
long scaledValue2 = Math.round(value2 * ft.getScalingFactor());
|
||||
assertEquals(
|
||||
LongPoint.newSetQuery("scaled_float", scaledValue1, scaledValue2),
|
||||
ft.termsQuery(Arrays.asList(value1, value2), null));
|
||||
}
|
||||
|
||||
public void testRangeQuery() throws IOException {
|
||||
// make sure the accuracy loss of scaled floats only occurs at index time
|
||||
// this test checks that searching scaled floats yields the same results as
|
||||
// searching doubles that are rounded to the closest half float
|
||||
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType();
|
||||
ft.setName("scaled_float");
|
||||
ft.setScalingFactor(0.1 + randomDouble() * 100);
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
|
||||
final int numDocs = 1000;
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
Document doc = new Document();
|
||||
double value = (randomDouble() * 2 - 1) * 10000;
|
||||
long scaledValue = Math.round(value * ft.getScalingFactor());
|
||||
double rounded = scaledValue / ft.getScalingFactor();
|
||||
doc.add(new LongPoint("scaled_float", scaledValue));
|
||||
doc.add(new DoublePoint("double", rounded));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
final DirectoryReader reader = DirectoryReader.open(w);
|
||||
w.close();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
final int numQueries = 1000;
|
||||
for (int i = 0; i < numQueries; ++i) {
|
||||
double l = (randomDouble() * 2 - 1) * 10000;
|
||||
double u = (randomDouble() * 2 - 1) * 10000;
|
||||
boolean includeLower = randomBoolean();
|
||||
boolean includeUpper = randomBoolean();
|
||||
Query doubleQ = NumberFieldMapper.NumberType.DOUBLE.rangeQuery("double", l, u, includeLower, includeUpper);
|
||||
Query scaledFloatQ = ft.rangeQuery(l, u, includeLower, includeUpper);
|
||||
assertEquals(searcher.count(doubleQ), searcher.count(scaledFloatQ));
|
||||
}
|
||||
IOUtils.close(reader, dir);
|
||||
}
|
||||
|
||||
public void testValueForSearch() {
|
||||
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType();
|
||||
ft.setName("scaled_float");
|
||||
ft.setScalingFactor(0.1 + randomDouble() * 100);
|
||||
assertNull(ft.valueForSearch(null));
|
||||
assertEquals(10/ft.getScalingFactor(), ft.valueForSearch(10L));
|
||||
}
|
||||
|
||||
public void testStats() throws IOException {
|
||||
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType();
|
||||
ft.setName("scaled_float");
|
||||
ft.setScalingFactor(0.1 + randomDouble() * 100);
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
|
||||
try (DirectoryReader reader = DirectoryReader.open(w)) {
|
||||
assertNull(ft.stats(reader));
|
||||
}
|
||||
Document doc = new Document();
|
||||
LongPoint point = new LongPoint("scaled_float", -1);
|
||||
doc.add(point);
|
||||
w.addDocument(doc);
|
||||
point.setLongValue(10);
|
||||
w.addDocument(doc);
|
||||
try (DirectoryReader reader = DirectoryReader.open(w)) {
|
||||
FieldStats<?> stats = ft.stats(reader);
|
||||
assertEquals(-1/ft.getScalingFactor(), stats.getMinValue());
|
||||
assertEquals(10/ft.getScalingFactor(), stats.getMaxValue());
|
||||
assertEquals(2, stats.getMaxDoc());
|
||||
}
|
||||
w.deleteAll();
|
||||
try (DirectoryReader reader = DirectoryReader.open(w)) {
|
||||
assertNull(ft.stats(reader));
|
||||
}
|
||||
IOUtils.close(w, dir);
|
||||
}
|
||||
|
||||
public void testFieldData() throws IOException {
|
||||
ScaledFloatFieldMapper.ScaledFloatFieldType ft = new ScaledFloatFieldMapper.ScaledFloatFieldType();
|
||||
ft.setScalingFactor(0.1 + randomDouble() * 100);
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedNumericDocValuesField("scaled_float1", 10));
|
||||
doc.add(new SortedNumericDocValuesField("scaled_float2", 5));
|
||||
doc.add(new SortedNumericDocValuesField("scaled_float2", 12));
|
||||
w.addDocument(doc);
|
||||
try (DirectoryReader reader = DirectoryReader.open(w)) {
|
||||
IndexMetaData indexMetadata = new IndexMetaData.Builder("index").settings(
|
||||
Settings.builder()
|
||||
.put("index.version.created", Version.CURRENT)
|
||||
.put("index.number_of_shards", 1)
|
||||
.put("index.number_of_replicas", 0).build()).build();
|
||||
IndexSettings indexSettings = new IndexSettings(indexMetadata, Settings.EMPTY);
|
||||
|
||||
// single-valued
|
||||
ft.setName("scaled_float1");
|
||||
IndexNumericFieldData fielddata = (IndexNumericFieldData) ft.fielddataBuilder().build(indexSettings, ft, null, null, null);
|
||||
AtomicNumericFieldData leafFieldData = fielddata.load(reader.leaves().get(0));
|
||||
SortedNumericDoubleValues values = leafFieldData.getDoubleValues();
|
||||
values.setDocument(0);
|
||||
assertEquals(1, values.count());
|
||||
assertEquals(10/ft.getScalingFactor(), values.valueAt(0), 10e-5);
|
||||
|
||||
// multi-valued
|
||||
ft.setName("scaled_float2");
|
||||
fielddata = (IndexNumericFieldData) ft.fielddataBuilder().build(indexSettings, ft, null, null, null);
|
||||
leafFieldData = fielddata.load(reader.leaves().get(0));
|
||||
values = leafFieldData.getDoubleValues();
|
||||
values.setDocument(0);
|
||||
assertEquals(2, values.count());
|
||||
assertEquals(5/ft.getScalingFactor(), values.valueAt(0), 10e-5);
|
||||
assertEquals(12/ft.getScalingFactor(), values.valueAt(1), 10e-5);
|
||||
}
|
||||
IOUtils.close(w, dir);
|
||||
}
|
||||
}
|
@ -152,8 +152,9 @@ space. They can be compressed more aggressively by using the `best_compression`
|
||||
[float]
|
||||
=== Use the smallest numeric type that is sufficient
|
||||
|
||||
When storing <<number,numeric data>>, using `float` over `double`, or `half_float`
|
||||
over `float` can help save storage. This is also true for integer types, but less
|
||||
since Elasticsearch will more easily compress them based on the number of bits
|
||||
that they actually need.
|
||||
|
||||
The type that you pick for <<number,numeric data>> can have a significant impact
|
||||
on disk usage. In particular, integers should be stored using an integer type
|
||||
(`byte`, `short`, `integer` or `long`) and floating points should either be
|
||||
stored in a `scaled_float` if appropriate or in the smallest type that fits the
|
||||
use-case: using `float` over `double`, or `half_float` over `float` will help
|
||||
save storage.
|
||||
|
@ -4,13 +4,14 @@
|
||||
The following numeric types are supported:
|
||||
|
||||
[horizontal]
|
||||
`long`:: A signed 64-bit integer with a minimum value of +-2^63^+ and a maximum value of +2^63^-1+.
|
||||
`integer`:: A signed 32-bit integer with a minimum value of +-2^31^+ and a maximum value of +2^31^-1+.
|
||||
`short`:: A signed 16-bit integer with a minimum value of +-32,768+ and a maximum value of +32,767+.
|
||||
`byte`:: A signed 8-bit integer with a minimum value of +-128+ and a maximum value of +127+.
|
||||
`double`:: A double-precision 64-bit IEEE 754 floating point.
|
||||
`float`:: A single-precision 32-bit IEEE 754 floating point.
|
||||
`half_float`:: A half-precision 16-bit IEEE 754 floating point.
|
||||
`long`:: A signed 64-bit integer with a minimum value of +-2^63^+ and a maximum value of +2^63^-1+.
|
||||
`integer`:: A signed 32-bit integer with a minimum value of +-2^31^+ and a maximum value of +2^31^-1+.
|
||||
`short`:: A signed 16-bit integer with a minimum value of +-32,768+ and a maximum value of +32,767+.
|
||||
`byte`:: A signed 8-bit integer with a minimum value of +-128+ and a maximum value of +127+.
|
||||
`double`:: A double-precision 64-bit IEEE 754 floating point.
|
||||
`float`:: A single-precision 32-bit IEEE 754 floating point.
|
||||
`half_float`:: A half-precision 16-bit IEEE 754 floating point.
|
||||
`scaled_float`:: A floating point that is backed by a `long` and a fixed scaling factor.
|
||||
|
||||
Below is an example of configuring a mapping with numeric fields:
|
||||
|
||||
@ -26,6 +27,10 @@ PUT my_index
|
||||
},
|
||||
"time_in_seconds": {
|
||||
"type": "float"
|
||||
},
|
||||
"price": {
|
||||
"type": "scaled_float",
|
||||
"scaling_factor": 100
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -42,11 +47,24 @@ help indexing and searching be more efficient. Note however that given that
|
||||
storage is optimized based on the actual values that are stored, picking one
|
||||
type over another one will have no impact on storage requirements.
|
||||
|
||||
For floating-point types, picking the smallest type that is enough for the
|
||||
use-case will still help indexing and searching be more efficient. However,
|
||||
given that floating-point data is hard to compress, it might also have a
|
||||
significant impact on storage requirements. Here is a table that compares the
|
||||
3 floating-point types that are available in order to help make a decision.
|
||||
For floating-point types, it is often more efficient to store floating-point
|
||||
data into an integer using a scaling factor, which is what the `scaled_float`
|
||||
type does under the hood. For instance, a `price` field could be stored in a
|
||||
`scaled_float` with a `scaling_factor` of +100+. All APIs would work as if
|
||||
the field was stored as a double, but under the hood elasticsearch would be
|
||||
working with the number of cents, +price*100+, which is an integer. This is
|
||||
mostly helpful to save disk space since integers are way easier to compress
|
||||
than floating points. `scaled_float` is also fine to use in order to trade
|
||||
accuracy for disk space. For instance imagine that you are tracking cpu
|
||||
utilization as a number between +0+ and +1+. It usually does not matter much
|
||||
whether cpu utilization is +12.7%+ or +13%+, so you could use a `scaled_float`
|
||||
with a `scaling_factor` of +100+ in order to round cpu utilization to the
|
||||
closest percent in order to save space.
|
||||
|
||||
If `scaled_float` is not a good fit, then you should pick the smallest type
|
||||
that is enough for the use-case among the floating-point types: `double`,
|
||||
`float` and `half_float`. Here is a table that compares these types in order
|
||||
to help make a decision.
|
||||
|
||||
[cols="<,<,<,<",options="header",]
|
||||
|=======================================================================
|
||||
@ -56,12 +74,6 @@ significant impact on storage requirements. Here is a table that compares the
|
||||
|`half_float`|+2^-24^+ |+65504+ |+11+ / +3.31+
|
||||
|=======================================================================
|
||||
|
||||
When possible, it is often more efficient to store floating-point data into an
|
||||
integer using a scaling factor. For instance, it is more efficient to store
|
||||
percentages as integers between 0 and 100 than as floating-point numbers between 0
|
||||
and 1. Another example would be prices: it will be more efficient to store prices
|
||||
as a number of cents, which is an integer, than as a floating-point number.
|
||||
|
||||
[[number-params]]
|
||||
==== Parameters for numeric fields
|
||||
|
||||
@ -114,4 +126,19 @@ The following parameters are accepted by numeric types:
|
||||
the <<mapping-source-field,`_source`>> field. Accepts `true` or `false`
|
||||
(default).
|
||||
|
||||
[[scaled-float-params]]
|
||||
==== Parameters for `scaled_float`
|
||||
|
||||
`scaled_float` accepts an additional parameter:
|
||||
|
||||
[horizontal]
|
||||
|
||||
`scaling_factor`::
|
||||
|
||||
The scaling factor to use when encoding values. Values will be multiplied
|
||||
by this factor at index time and rounded to the closest long value. For
|
||||
instance, a `scaled_float` with a `scaling_factor` of +10+ would internally
|
||||
store +2.34+ as +23+ and all search-time operations (queries, aggregations,
|
||||
sorting) will behave as if the document had a value of +2.3+. High values
|
||||
of `scaling_factor` improve accuracy but also increase space requirements.
|
||||
This parameter is required.
|
||||
|
Loading…
x
Reference in New Issue
Block a user