mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-27 02:18:42 +00:00
Expose half-floats. #18887
They have been implemented in https://issues.apache.org/jira/browse/LUCENE-7289. Ranges are implemented so that the accuracy loss only occurs at index time, which means that if you are searching for values between A and B, the query will match exactly all documents whose value rounded to the closest half-float point is between A and B.
This commit is contained in:
parent
18ff051ad5
commit
9ffb2ff6ba
@ -29,6 +29,7 @@ public interface IndexNumericFieldData extends IndexFieldData<AtomicNumericField
|
|||||||
SHORT(false),
|
SHORT(false),
|
||||||
INT(false),
|
INT(false),
|
||||||
LONG(false),
|
LONG(false),
|
||||||
|
HALF_FLOAT(true),
|
||||||
FLOAT(true),
|
FLOAT(true),
|
||||||
DOUBLE(true);
|
DOUBLE(true);
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
|
|
||||||
package org.elasticsearch.index.fielddata.plain;
|
package org.elasticsearch.index.fielddata.plain;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.HalfFloatPoint;
|
||||||
import org.apache.lucene.index.DocValues;
|
import org.apache.lucene.index.DocValues;
|
||||||
import org.apache.lucene.index.DocValuesType;
|
import org.apache.lucene.index.DocValuesType;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
@ -61,6 +62,7 @@ public class SortedNumericDVIndexFieldData extends DocValuesIndexFieldData imple
|
|||||||
@Override
|
@Override
|
||||||
public org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource comparatorSource(Object missingValue, MultiValueMode sortMode, Nested nested) {
|
public org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource comparatorSource(Object missingValue, MultiValueMode sortMode, Nested nested) {
|
||||||
switch (numericType) {
|
switch (numericType) {
|
||||||
|
case HALF_FLOAT:
|
||||||
case FLOAT:
|
case FLOAT:
|
||||||
return new FloatValuesComparatorSource(this, missingValue, sortMode, nested);
|
return new FloatValuesComparatorSource(this, missingValue, sortMode, nested);
|
||||||
case DOUBLE:
|
case DOUBLE:
|
||||||
@ -87,6 +89,8 @@ public class SortedNumericDVIndexFieldData extends DocValuesIndexFieldData imple
|
|||||||
final String field = fieldName;
|
final String field = fieldName;
|
||||||
|
|
||||||
switch (numericType) {
|
switch (numericType) {
|
||||||
|
case HALF_FLOAT:
|
||||||
|
return new SortedNumericHalfFloatFieldData(reader, field);
|
||||||
case FLOAT:
|
case FLOAT:
|
||||||
return new SortedNumericFloatFieldData(reader, field);
|
return new SortedNumericFloatFieldData(reader, field);
|
||||||
case DOUBLE:
|
case DOUBLE:
|
||||||
@ -134,6 +138,95 @@ public class SortedNumericDVIndexFieldData extends DocValuesIndexFieldData imple
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* FieldData implementation for 16-bit float values.
|
||||||
|
* <p>
|
||||||
|
* Order of values within a document is consistent with
|
||||||
|
* {@link Float#compareTo(Float)}, hence the following reversible
|
||||||
|
* transformation is applied at both index and search:
|
||||||
|
* {@code bits ^ (bits >> 15) & 0x7fff}
|
||||||
|
* <p>
|
||||||
|
* Although the API is multi-valued, most codecs in Lucene specialize
|
||||||
|
* for the case where documents have at most one value. In this case
|
||||||
|
* {@link FieldData#unwrapSingleton(SortedNumericDoubleValues)} will return
|
||||||
|
* the underlying single-valued NumericDoubleValues representation, and
|
||||||
|
* {@link FieldData#unwrapSingletonBits(SortedNumericDoubleValues)} will return
|
||||||
|
* a Bits matching documents that have a real value (as opposed to missing).
|
||||||
|
*/
|
||||||
|
static final class SortedNumericHalfFloatFieldData extends AtomicDoubleFieldData {
|
||||||
|
final LeafReader reader;
|
||||||
|
final String field;
|
||||||
|
|
||||||
|
SortedNumericHalfFloatFieldData(LeafReader reader, String field) {
|
||||||
|
super(0L);
|
||||||
|
this.reader = reader;
|
||||||
|
this.field = field;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedNumericDoubleValues getDoubleValues() {
|
||||||
|
try {
|
||||||
|
SortedNumericDocValues raw = DocValues.getSortedNumeric(reader, field);
|
||||||
|
|
||||||
|
NumericDocValues single = DocValues.unwrapSingleton(raw);
|
||||||
|
if (single != null) {
|
||||||
|
return FieldData.singleton(new SingleHalfFloatValues(single), DocValues.unwrapSingletonBits(raw));
|
||||||
|
} else {
|
||||||
|
return new MultiHalfFloatValues(raw);
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new IllegalStateException("Cannot load doc values", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<Accountable> getChildResources() {
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wraps a NumericDocValues and exposes a single 16-bit float per document.
|
||||||
|
*/
|
||||||
|
static final class SingleHalfFloatValues extends NumericDoubleValues {
|
||||||
|
final NumericDocValues in;
|
||||||
|
|
||||||
|
SingleHalfFloatValues(NumericDocValues in) {
|
||||||
|
this.in = in;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double get(int docID) {
|
||||||
|
return HalfFloatPoint.sortableShortToHalfFloat((short) in.get(docID));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wraps a SortedNumericDocValues and exposes multiple 16-bit floats per document.
|
||||||
|
*/
|
||||||
|
static final class MultiHalfFloatValues extends SortedNumericDoubleValues {
|
||||||
|
final SortedNumericDocValues in;
|
||||||
|
|
||||||
|
MultiHalfFloatValues(SortedNumericDocValues in) {
|
||||||
|
this.in = in;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setDocument(int doc) {
|
||||||
|
in.setDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double valueAt(int index) {
|
||||||
|
return HalfFloatPoint.sortableShortToHalfFloat((short) in.valueAt(index));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int count() {
|
||||||
|
return in.count();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* FieldData implementation for 32-bit float values.
|
* FieldData implementation for 32-bit float values.
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -22,6 +22,7 @@ package org.elasticsearch.index.mapper.core;
|
|||||||
import org.apache.lucene.document.DoublePoint;
|
import org.apache.lucene.document.DoublePoint;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FloatPoint;
|
import org.apache.lucene.document.FloatPoint;
|
||||||
|
import org.apache.lucene.document.HalfFloatPoint;
|
||||||
import org.apache.lucene.document.IntPoint;
|
import org.apache.lucene.document.IntPoint;
|
||||||
import org.apache.lucene.document.LongPoint;
|
import org.apache.lucene.document.LongPoint;
|
||||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||||
@ -180,6 +181,86 @@ public class NumberFieldMapper extends FieldMapper implements AllFieldMapper.Inc
|
|||||||
}
|
}
|
||||||
|
|
||||||
public enum NumberType {
|
public enum NumberType {
|
||||||
|
HALF_FLOAT("half_float", NumericType.HALF_FLOAT) {
|
||||||
|
@Override
|
||||||
|
Float parse(Object value) {
|
||||||
|
return (Float) FLOAT.parse(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
Float parse(XContentParser parser, boolean coerce) throws IOException {
|
||||||
|
return parser.floatValue(coerce);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
Query termQuery(String field, Object value) {
|
||||||
|
float v = parse(value);
|
||||||
|
return HalfFloatPoint.newExactQuery(field, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
Query termsQuery(String field, List<Object> values) {
|
||||||
|
float[] v = new float[values.size()];
|
||||||
|
for (int i = 0; i < values.size(); ++i) {
|
||||||
|
v[i] = parse(values.get(i));
|
||||||
|
}
|
||||||
|
return HalfFloatPoint.newSetQuery(field, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
Query rangeQuery(String field, Object lowerTerm, Object upperTerm,
|
||||||
|
boolean includeLower, boolean includeUpper) {
|
||||||
|
float l = Float.NEGATIVE_INFINITY;
|
||||||
|
float u = Float.POSITIVE_INFINITY;
|
||||||
|
if (lowerTerm != null) {
|
||||||
|
l = parse(lowerTerm);
|
||||||
|
if (includeLower) {
|
||||||
|
l = Math.nextDown(l);
|
||||||
|
}
|
||||||
|
l = HalfFloatPoint.nextUp(l);
|
||||||
|
}
|
||||||
|
if (upperTerm != null) {
|
||||||
|
u = parse(upperTerm);
|
||||||
|
if (includeUpper) {
|
||||||
|
u = Math.nextUp(u);
|
||||||
|
}
|
||||||
|
u = HalfFloatPoint.nextDown(u);
|
||||||
|
}
|
||||||
|
return HalfFloatPoint.newRangeQuery(field, l, u);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Field> createFields(String name, Number value,
|
||||||
|
boolean indexed, boolean docValued, boolean stored) {
|
||||||
|
List<Field> fields = new ArrayList<>();
|
||||||
|
if (indexed) {
|
||||||
|
fields.add(new HalfFloatPoint(name, value.floatValue()));
|
||||||
|
}
|
||||||
|
if (docValued) {
|
||||||
|
fields.add(new SortedNumericDocValuesField(name,
|
||||||
|
HalfFloatPoint.halfFloatToSortableShort(value.floatValue())));
|
||||||
|
}
|
||||||
|
if (stored) {
|
||||||
|
fields.add(new StoredField(name, value.floatValue()));
|
||||||
|
}
|
||||||
|
return fields;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
FieldStats.Double stats(IndexReader reader, String fieldName,
|
||||||
|
boolean isSearchable, boolean isAggregatable) throws IOException {
|
||||||
|
long size = XPointValues.size(reader, fieldName);
|
||||||
|
if (size == 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
int docCount = XPointValues.getDocCount(reader, fieldName);
|
||||||
|
byte[] min = XPointValues.getMinPackedValue(reader, fieldName);
|
||||||
|
byte[] max = XPointValues.getMaxPackedValue(reader, fieldName);
|
||||||
|
return new FieldStats.Double(reader.maxDoc(),docCount, -1L, size,
|
||||||
|
isSearchable, isAggregatable,
|
||||||
|
HalfFloatPoint.decodeDimension(min, 0), HalfFloatPoint.decodeDimension(max, 0));
|
||||||
|
}
|
||||||
|
},
|
||||||
FLOAT("float", NumericType.FLOAT) {
|
FLOAT("float", NumericType.FLOAT) {
|
||||||
@Override
|
@Override
|
||||||
Float parse(Object value) {
|
Float parse(Object value) {
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
|
|
||||||
package org.elasticsearch.fieldstats;
|
package org.elasticsearch.fieldstats;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.HalfFloatPoint;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.action.ActionRequestValidationException;
|
import org.elasticsearch.action.ActionRequestValidationException;
|
||||||
import org.elasticsearch.action.fieldstats.FieldStats;
|
import org.elasticsearch.action.fieldstats.FieldStats;
|
||||||
@ -54,6 +55,7 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
|
|||||||
"string", "type=text",
|
"string", "type=text",
|
||||||
"date", "type=date",
|
"date", "type=date",
|
||||||
"double", "type=double",
|
"double", "type=double",
|
||||||
|
"half_float", "type=half_float",
|
||||||
"float", "type=float",
|
"float", "type=float",
|
||||||
"long", "type=long",
|
"long", "type=long",
|
||||||
"integer", "type=integer",
|
"integer", "type=integer",
|
||||||
@ -67,6 +69,7 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
|
|||||||
"string", "type=text,index=false",
|
"string", "type=text,index=false",
|
||||||
"date", "type=date,index=false",
|
"date", "type=date,index=false",
|
||||||
"double", "type=double,index=false",
|
"double", "type=double,index=false",
|
||||||
|
"half_float", "type=half_float",
|
||||||
"float", "type=float,index=false",
|
"float", "type=float,index=false",
|
||||||
"long", "type=long,index=false",
|
"long", "type=long,index=false",
|
||||||
"integer", "type=integer,index=false",
|
"integer", "type=integer,index=false",
|
||||||
@ -81,6 +84,7 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
|
|||||||
"string", "type=text,index=false",
|
"string", "type=text,index=false",
|
||||||
"date", "type=date,index=false",
|
"date", "type=date,index=false",
|
||||||
"double", "type=double,index=false",
|
"double", "type=double,index=false",
|
||||||
|
"half_float", "type=half_float",
|
||||||
"float", "type=float,index=false",
|
"float", "type=float,index=false",
|
||||||
"long", "type=long,index=false",
|
"long", "type=long,index=false",
|
||||||
"integer", "type=integer,index=false",
|
"integer", "type=integer,index=false",
|
||||||
@ -97,10 +101,12 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
|
|||||||
long maxInt = Integer.MIN_VALUE;
|
long maxInt = Integer.MIN_VALUE;
|
||||||
long minLong = Long.MAX_VALUE;
|
long minLong = Long.MAX_VALUE;
|
||||||
long maxLong = Long.MIN_VALUE;
|
long maxLong = Long.MIN_VALUE;
|
||||||
double minFloat = Float.MAX_VALUE;
|
double minHalfFloat = Double.POSITIVE_INFINITY;
|
||||||
double maxFloat = Float.MIN_VALUE;
|
double maxHalfFloat = Double.NEGATIVE_INFINITY;
|
||||||
double minDouble = Double.MAX_VALUE;
|
double minFloat = Double.POSITIVE_INFINITY;
|
||||||
double maxDouble = Double.MIN_VALUE;
|
double maxFloat = Double.NEGATIVE_INFINITY;
|
||||||
|
double minDouble = Double.POSITIVE_INFINITY;
|
||||||
|
double maxDouble = Double.NEGATIVE_INFINITY;
|
||||||
String minString = new String(Character.toChars(1114111));
|
String minString = new String(Character.toChars(1114111));
|
||||||
String maxString = "0";
|
String maxString = "0";
|
||||||
|
|
||||||
@ -119,6 +125,10 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
|
|||||||
long l = randomLong();
|
long l = randomLong();
|
||||||
minLong = Math.min(minLong, l);
|
minLong = Math.min(minLong, l);
|
||||||
maxLong = Math.max(maxLong, l);
|
maxLong = Math.max(maxLong, l);
|
||||||
|
float hf = randomFloat();
|
||||||
|
hf = HalfFloatPoint.sortableShortToHalfFloat(HalfFloatPoint.halfFloatToSortableShort(hf));
|
||||||
|
minHalfFloat = Math.min(minHalfFloat, hf);
|
||||||
|
maxHalfFloat = Math.max(maxHalfFloat, hf);
|
||||||
float f = randomFloat();
|
float f = randomFloat();
|
||||||
minFloat = Math.min(minFloat, f);
|
minFloat = Math.min(minFloat, f);
|
||||||
maxFloat = Math.max(maxFloat, f);
|
maxFloat = Math.max(maxFloat, f);
|
||||||
@ -138,6 +148,7 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
|
|||||||
"short", s,
|
"short", s,
|
||||||
"integer", i,
|
"integer", i,
|
||||||
"long", l,
|
"long", l,
|
||||||
|
"half_float", hf,
|
||||||
"float", f,
|
"float", f,
|
||||||
"double", d,
|
"double", d,
|
||||||
"string", str)
|
"string", str)
|
||||||
@ -147,7 +158,7 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
|
|||||||
|
|
||||||
FieldStatsResponse response = client()
|
FieldStatsResponse response = client()
|
||||||
.prepareFieldStats()
|
.prepareFieldStats()
|
||||||
.setFields("byte", "short", "integer", "long", "float", "double", "string").get();
|
.setFields("byte", "short", "integer", "long", "half_float", "float", "double", "string").get();
|
||||||
assertAllSuccessful(response);
|
assertAllSuccessful(response);
|
||||||
|
|
||||||
for (FieldStats<?> stats : response.getAllFieldStats().values()) {
|
for (FieldStats<?> stats : response.getAllFieldStats().values()) {
|
||||||
@ -164,6 +175,8 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
|
|||||||
assertThat(response.getAllFieldStats().get("integer").getMaxValue(), equalTo(maxInt));
|
assertThat(response.getAllFieldStats().get("integer").getMaxValue(), equalTo(maxInt));
|
||||||
assertThat(response.getAllFieldStats().get("long").getMinValue(), equalTo(minLong));
|
assertThat(response.getAllFieldStats().get("long").getMinValue(), equalTo(minLong));
|
||||||
assertThat(response.getAllFieldStats().get("long").getMaxValue(), equalTo(maxLong));
|
assertThat(response.getAllFieldStats().get("long").getMaxValue(), equalTo(maxLong));
|
||||||
|
assertThat(response.getAllFieldStats().get("half_float").getMinValue(), equalTo(minHalfFloat));
|
||||||
|
assertThat(response.getAllFieldStats().get("half_float").getMaxValue(), equalTo(maxHalfFloat));
|
||||||
assertThat(response.getAllFieldStats().get("float").getMinValue(), equalTo(minFloat));
|
assertThat(response.getAllFieldStats().get("float").getMinValue(), equalTo(minFloat));
|
||||||
assertThat(response.getAllFieldStats().get("float").getMaxValue(), equalTo(maxFloat));
|
assertThat(response.getAllFieldStats().get("float").getMaxValue(), equalTo(maxFloat));
|
||||||
assertThat(response.getAllFieldStats().get("double").getMinValue(), equalTo(minDouble));
|
assertThat(response.getAllFieldStats().get("double").getMinValue(), equalTo(minDouble));
|
||||||
|
@ -108,6 +108,24 @@ public class FieldStatsTests extends ESSingleNodeTestCase {
|
|||||||
assertThat(result.getAllFieldStats().get(fieldName).getMinValueAsString(), equalTo(Double.toString(-1)));
|
assertThat(result.getAllFieldStats().get(fieldName).getMinValueAsString(), equalTo(Double.toString(-1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testHalfFloat() {
|
||||||
|
String fieldName = "field";
|
||||||
|
createIndex("test", Settings.EMPTY, "test", fieldName, "type=half_float");
|
||||||
|
for (float value = -1; value <= 9; value++) {
|
||||||
|
client().prepareIndex("test", "test").setSource(fieldName, value).get();
|
||||||
|
}
|
||||||
|
client().admin().indices().prepareRefresh().get();
|
||||||
|
|
||||||
|
FieldStatsResponse result = client().prepareFieldStats().setFields(fieldName).get();
|
||||||
|
assertThat(result.getAllFieldStats().get(fieldName).getMaxDoc(), equalTo(11L));
|
||||||
|
assertThat(result.getAllFieldStats().get(fieldName).getDocCount(), equalTo(11L));
|
||||||
|
assertThat(result.getAllFieldStats().get(fieldName).getDensity(), equalTo(100));
|
||||||
|
assertThat(result.getAllFieldStats().get(fieldName).getMinValue(), equalTo(-1d));
|
||||||
|
assertThat(result.getAllFieldStats().get(fieldName).getMaxValue(), equalTo(9d));
|
||||||
|
assertThat(result.getAllFieldStats().get(fieldName).getMinValueAsString(), equalTo(Float.toString(-1)));
|
||||||
|
assertThat(result.getAllFieldStats().get(fieldName).getMaxValueAsString(), equalTo(Float.toString(9)));
|
||||||
|
}
|
||||||
|
|
||||||
public void testFloat() {
|
public void testFloat() {
|
||||||
String fieldName = "field";
|
String fieldName = "field";
|
||||||
createIndex("test", Settings.EMPTY, "test", fieldName, "type=float");
|
createIndex("test", Settings.EMPTY, "test", fieldName, "type=float");
|
||||||
|
@ -0,0 +1,82 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.fielddata.plain;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.index.IndexableField;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
import org.elasticsearch.index.fielddata.FieldData;
|
||||||
|
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
|
||||||
|
import org.elasticsearch.index.mapper.core.NumberFieldMapper;
|
||||||
|
import org.elasticsearch.test.ESTestCase;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class HalfFloatFielddataTests extends ESTestCase {
|
||||||
|
|
||||||
|
public void testSingleValued() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
// we need the default codec to check for singletons
|
||||||
|
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null).setCodec(TestUtil.getDefaultCodec()));
|
||||||
|
Document doc = new Document();
|
||||||
|
for (IndexableField f : NumberFieldMapper.NumberType.HALF_FLOAT.createFields("half_float", 3f, false, true, false)) {
|
||||||
|
doc.add(f);
|
||||||
|
}
|
||||||
|
w.addDocument(doc);
|
||||||
|
final DirectoryReader dirReader = DirectoryReader.open(w);
|
||||||
|
LeafReader reader = getOnlyLeafReader(dirReader);
|
||||||
|
SortedNumericDoubleValues values = new SortedNumericDVIndexFieldData.SortedNumericHalfFloatFieldData(
|
||||||
|
reader, "half_float").getDoubleValues();
|
||||||
|
assertNotNull(FieldData.unwrapSingleton(values));
|
||||||
|
values.setDocument(0);
|
||||||
|
assertEquals(1, values.count());
|
||||||
|
assertEquals(3f, values.valueAt(0), 0f);
|
||||||
|
IOUtils.close(dirReader, w, dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testMultiValued() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
|
||||||
|
Document doc = new Document();
|
||||||
|
for (IndexableField f : NumberFieldMapper.NumberType.HALF_FLOAT.createFields("half_float", 3f, false, true, false)) {
|
||||||
|
doc.add(f);
|
||||||
|
}
|
||||||
|
for (IndexableField f : NumberFieldMapper.NumberType.HALF_FLOAT.createFields("half_float", 2f, false, true, false)) {
|
||||||
|
doc.add(f);
|
||||||
|
}
|
||||||
|
w.addDocument(doc);
|
||||||
|
final DirectoryReader dirReader = DirectoryReader.open(w);
|
||||||
|
LeafReader reader = getOnlyLeafReader(dirReader);
|
||||||
|
SortedNumericDoubleValues values = new SortedNumericDVIndexFieldData.SortedNumericHalfFloatFieldData(
|
||||||
|
reader, "half_float").getDoubleValues();
|
||||||
|
assertNull(FieldData.unwrapSingleton(values));
|
||||||
|
values.setDocument(0);
|
||||||
|
assertEquals(2, values.count());
|
||||||
|
assertEquals(2f, values.valueAt(0), 0f);
|
||||||
|
assertEquals(3f, values.valueAt(1), 0f);
|
||||||
|
IOUtils.close(dirReader, w, dir);
|
||||||
|
}
|
||||||
|
}
|
@ -21,8 +21,18 @@ package org.elasticsearch.index.mapper.core;
|
|||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.FloatPoint;
|
||||||
|
import org.apache.lucene.document.HalfFloatPoint;
|
||||||
import org.apache.lucene.document.LongPoint;
|
import org.apache.lucene.document.LongPoint;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.elasticsearch.index.mapper.FieldTypeTestCase;
|
import org.elasticsearch.index.mapper.FieldTypeTestCase;
|
||||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||||
import org.elasticsearch.index.mapper.MappedFieldType.Relation;
|
import org.elasticsearch.index.mapper.MappedFieldType.Relation;
|
||||||
@ -81,6 +91,7 @@ public class NumberFieldTypeTests extends FieldTypeTestCase {
|
|||||||
assertEquals((short) 3, NumberType.SHORT.parse(3d));
|
assertEquals((short) 3, NumberType.SHORT.parse(3d));
|
||||||
assertEquals(3, NumberType.INTEGER.parse(3d));
|
assertEquals(3, NumberType.INTEGER.parse(3d));
|
||||||
assertEquals(3L, NumberType.LONG.parse(3d));
|
assertEquals(3L, NumberType.LONG.parse(3d));
|
||||||
|
assertEquals(3f, NumberType.HALF_FLOAT.parse(3d));
|
||||||
assertEquals(3f, NumberType.FLOAT.parse(3d));
|
assertEquals(3f, NumberType.FLOAT.parse(3d));
|
||||||
assertEquals(3d, NumberType.DOUBLE.parse(3d));
|
assertEquals(3d, NumberType.DOUBLE.parse(3d));
|
||||||
|
|
||||||
@ -103,7 +114,39 @@ public class NumberFieldTypeTests extends FieldTypeTestCase {
|
|||||||
assertEquals("Value [2147483648] is out of range for an integer", e.getMessage());
|
assertEquals("Value [2147483648] is out of range for an integer", e.getMessage());
|
||||||
e = expectThrows(IllegalArgumentException.class, () -> NumberType.LONG.parse(10000000000000000000d));
|
e = expectThrows(IllegalArgumentException.class, () -> NumberType.LONG.parse(10000000000000000000d));
|
||||||
assertEquals("Value [1.0E19] is out of range for a long", e.getMessage());
|
assertEquals("Value [1.0E19] is out of range for a long", e.getMessage());
|
||||||
assertEquals(1.1f, NumberType.FLOAT.parse(1.1)); // accuracy loss is expected
|
assertEquals(1.1f, NumberType.HALF_FLOAT.parse(1.1));
|
||||||
|
assertEquals(1.1f, NumberType.FLOAT.parse(1.1));
|
||||||
assertEquals(1.1d, NumberType.DOUBLE.parse(1.1));
|
assertEquals(1.1d, NumberType.DOUBLE.parse(1.1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testHalfFloatRange() throws IOException {
|
||||||
|
// make sure the accuracy loss of half floats only occurs at index time
|
||||||
|
// this test checks that searching half floats yields the same results as
|
||||||
|
// searching floats that are rounded to the closest half float
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
|
||||||
|
final int numDocs = 10000;
|
||||||
|
for (int i = 0; i < numDocs; ++i) {
|
||||||
|
Document doc = new Document();
|
||||||
|
float value = (randomFloat() * 2 - 1) * 70000;
|
||||||
|
float rounded = HalfFloatPoint.sortableShortToHalfFloat(HalfFloatPoint.halfFloatToSortableShort(value));
|
||||||
|
doc.add(new HalfFloatPoint("half_float", value));
|
||||||
|
doc.add(new FloatPoint("float", rounded));
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
final DirectoryReader reader = DirectoryReader.open(w);
|
||||||
|
w.close();
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
final int numQueries = 1000;
|
||||||
|
for (int i = 0; i < numQueries; ++i) {
|
||||||
|
float l = (randomFloat() * 2 - 1) * 70000;
|
||||||
|
float u = (randomFloat() * 2 - 1) * 70000;
|
||||||
|
boolean includeLower = randomBoolean();
|
||||||
|
boolean includeUpper = randomBoolean();
|
||||||
|
Query floatQ = NumberFieldMapper.NumberType.FLOAT.rangeQuery("float", l, u, includeLower, includeUpper);
|
||||||
|
Query halfFloatQ = NumberFieldMapper.NumberType.HALF_FLOAT.rangeQuery("half_float", l, u, includeLower, includeUpper);
|
||||||
|
assertEquals(searcher.count(floatQ), searcher.count(halfFloatQ));
|
||||||
|
}
|
||||||
|
IOUtils.close(reader, dir);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,12 +4,13 @@
|
|||||||
The following numeric types are supported:
|
The following numeric types are supported:
|
||||||
|
|
||||||
[horizontal]
|
[horizontal]
|
||||||
`long`:: A signed 64-bit integer with a minimum value of +-2^63^+ and a maximum value of +2^63^-1+.
|
`long`:: A signed 64-bit integer with a minimum value of +-2^63^+ and a maximum value of +2^63^-1+.
|
||||||
`integer`:: A signed 32-bit integer with a minimum value of +-2^31^+ and a maximum value of +2^31^-1+.
|
`integer`:: A signed 32-bit integer with a minimum value of +-2^31^+ and a maximum value of +2^31^-1+.
|
||||||
`short`:: A signed 16-bit integer with a minimum value of +-32,768+ and a maximum value of +32,767+.
|
`short`:: A signed 16-bit integer with a minimum value of +-32,768+ and a maximum value of +32,767+.
|
||||||
`byte`:: A signed 8-bit integer with a minimum value of +-128+ and a maximum value of +127+.
|
`byte`:: A signed 8-bit integer with a minimum value of +-128+ and a maximum value of +127+.
|
||||||
`double`:: A double-precision 64-bit IEEE 754 floating point.
|
`double`:: A double-precision 64-bit IEEE 754 floating point.
|
||||||
`float`:: A single-precision 32-bit IEEE 754 floating point.
|
`float`:: A single-precision 32-bit IEEE 754 floating point.
|
||||||
|
`half_float`:: A half-precision 16-bit IEEE 754 floating point.
|
||||||
|
|
||||||
Below is an example of configuring a mapping with numeric fields:
|
Below is an example of configuring a mapping with numeric fields:
|
||||||
|
|
||||||
@ -33,6 +34,34 @@ PUT my_index
|
|||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
|
||||||
|
==== Which type should I use?
|
||||||
|
|
||||||
|
As far as integer types (`byte`, `short`, `integer` and `long`) are concerned,
|
||||||
|
you should pick the smallest type which is enough for your use-case. This will
|
||||||
|
help indexing and searching be more efficient. Note however that given that
|
||||||
|
storage is optimized based on the actual values that are stored, picking one
|
||||||
|
type over another one will have no impact on storage requirements.
|
||||||
|
|
||||||
|
For floating-point types, picking the smallest type that is enough for the
|
||||||
|
use-case will still help indexing and searching be more efficient. However,
|
||||||
|
given that floating-point data is hard to compress, it might also have a
|
||||||
|
significant impact on storage requirements. Here is a table that compares the
|
||||||
|
3 floating-point types that are available in order to help make a decision.
|
||||||
|
|
||||||
|
[cols="<,<,<,<",options="header",]
|
||||||
|
|=======================================================================
|
||||||
|
|Type |Minimum value |Maximum value |Significant bits / digits
|
||||||
|
|`double`|+2^-1074^+ |+(2-2^-52^)·2^1023^+ |+53+ / +15.95+
|
||||||
|
|`float`|+2^-149^+ |+(2-2^-23^)·2^127^+ |+24+ / +7.22+
|
||||||
|
|`half_float`|+2^-24^+ |+65504+ |+11+ / +3.31+
|
||||||
|
|=======================================================================
|
||||||
|
|
||||||
|
When possible, it is often more efficient to store floating-point data into an
|
||||||
|
integer using a scaling factor. For instance, it is more efficient to store
|
||||||
|
percentages as integers between 0 and 100 than as floating-point numbers between 0
|
||||||
|
and 1. Another example would be prices: it will be more efficient to store prices
|
||||||
|
as a number of cents, which is an integer, than as a floating-point number.
|
||||||
|
|
||||||
[[number-params]]
|
[[number-params]]
|
||||||
==== Parameters for numeric fields
|
==== Parameters for numeric fields
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user