mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-25 01:19:02 +00:00
Expose half-floats. #18887
They have been implemented in https://issues.apache.org/jira/browse/LUCENE-7289. Ranges are implemented so that the accuracy loss only occurs at index time, which means that if you are searching for values between A and B, the query will match exactly all documents whose value rounded to the closest half-float point is between A and B.
This commit is contained in:
parent
18ff051ad5
commit
9ffb2ff6ba
@ -29,6 +29,7 @@ public interface IndexNumericFieldData extends IndexFieldData<AtomicNumericField
|
||||
SHORT(false),
|
||||
INT(false),
|
||||
LONG(false),
|
||||
HALF_FLOAT(true),
|
||||
FLOAT(true),
|
||||
DOUBLE(true);
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.document.HalfFloatPoint;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
@ -61,6 +62,7 @@ public class SortedNumericDVIndexFieldData extends DocValuesIndexFieldData imple
|
||||
@Override
|
||||
public org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource comparatorSource(Object missingValue, MultiValueMode sortMode, Nested nested) {
|
||||
switch (numericType) {
|
||||
case HALF_FLOAT:
|
||||
case FLOAT:
|
||||
return new FloatValuesComparatorSource(this, missingValue, sortMode, nested);
|
||||
case DOUBLE:
|
||||
@ -87,6 +89,8 @@ public class SortedNumericDVIndexFieldData extends DocValuesIndexFieldData imple
|
||||
final String field = fieldName;
|
||||
|
||||
switch (numericType) {
|
||||
case HALF_FLOAT:
|
||||
return new SortedNumericHalfFloatFieldData(reader, field);
|
||||
case FLOAT:
|
||||
return new SortedNumericFloatFieldData(reader, field);
|
||||
case DOUBLE:
|
||||
@ -134,6 +138,95 @@ public class SortedNumericDVIndexFieldData extends DocValuesIndexFieldData imple
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* FieldData implementation for 16-bit float values.
|
||||
* <p>
|
||||
* Order of values within a document is consistent with
|
||||
* {@link Float#compareTo(Float)}, hence the following reversible
|
||||
* transformation is applied at both index and search:
|
||||
* {@code bits ^ (bits >> 15) & 0x7fff}
|
||||
* <p>
|
||||
* Although the API is multi-valued, most codecs in Lucene specialize
|
||||
* for the case where documents have at most one value. In this case
|
||||
* {@link FieldData#unwrapSingleton(SortedNumericDoubleValues)} will return
|
||||
* the underlying single-valued NumericDoubleValues representation, and
|
||||
* {@link FieldData#unwrapSingletonBits(SortedNumericDoubleValues)} will return
|
||||
* a Bits matching documents that have a real value (as opposed to missing).
|
||||
*/
|
||||
static final class SortedNumericHalfFloatFieldData extends AtomicDoubleFieldData {
|
||||
final LeafReader reader;
|
||||
final String field;
|
||||
|
||||
SortedNumericHalfFloatFieldData(LeafReader reader, String field) {
|
||||
super(0L);
|
||||
this.reader = reader;
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedNumericDoubleValues getDoubleValues() {
|
||||
try {
|
||||
SortedNumericDocValues raw = DocValues.getSortedNumeric(reader, field);
|
||||
|
||||
NumericDocValues single = DocValues.unwrapSingleton(raw);
|
||||
if (single != null) {
|
||||
return FieldData.singleton(new SingleHalfFloatValues(single), DocValues.unwrapSingletonBits(raw));
|
||||
} else {
|
||||
return new MultiHalfFloatValues(raw);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new IllegalStateException("Cannot load doc values", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<Accountable> getChildResources() {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps a NumericDocValues and exposes a single 16-bit float per document.
|
||||
*/
|
||||
static final class SingleHalfFloatValues extends NumericDoubleValues {
|
||||
final NumericDocValues in;
|
||||
|
||||
SingleHalfFloatValues(NumericDocValues in) {
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double get(int docID) {
|
||||
return HalfFloatPoint.sortableShortToHalfFloat((short) in.get(docID));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps a SortedNumericDocValues and exposes multiple 16-bit floats per document.
|
||||
*/
|
||||
static final class MultiHalfFloatValues extends SortedNumericDoubleValues {
|
||||
final SortedNumericDocValues in;
|
||||
|
||||
MultiHalfFloatValues(SortedNumericDocValues in) {
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setDocument(int doc) {
|
||||
in.setDocument(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public double valueAt(int index) {
|
||||
return HalfFloatPoint.sortableShortToHalfFloat((short) in.valueAt(index));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int count() {
|
||||
return in.count();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* FieldData implementation for 32-bit float values.
|
||||
* <p>
|
||||
|
@ -22,6 +22,7 @@ package org.elasticsearch.index.mapper.core;
|
||||
import org.apache.lucene.document.DoublePoint;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FloatPoint;
|
||||
import org.apache.lucene.document.HalfFloatPoint;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||
@ -180,6 +181,86 @@ public class NumberFieldMapper extends FieldMapper implements AllFieldMapper.Inc
|
||||
}
|
||||
|
||||
public enum NumberType {
|
||||
HALF_FLOAT("half_float", NumericType.HALF_FLOAT) {
|
||||
@Override
|
||||
Float parse(Object value) {
|
||||
return (Float) FLOAT.parse(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
Float parse(XContentParser parser, boolean coerce) throws IOException {
|
||||
return parser.floatValue(coerce);
|
||||
}
|
||||
|
||||
@Override
|
||||
Query termQuery(String field, Object value) {
|
||||
float v = parse(value);
|
||||
return HalfFloatPoint.newExactQuery(field, v);
|
||||
}
|
||||
|
||||
@Override
|
||||
Query termsQuery(String field, List<Object> values) {
|
||||
float[] v = new float[values.size()];
|
||||
for (int i = 0; i < values.size(); ++i) {
|
||||
v[i] = parse(values.get(i));
|
||||
}
|
||||
return HalfFloatPoint.newSetQuery(field, v);
|
||||
}
|
||||
|
||||
@Override
|
||||
Query rangeQuery(String field, Object lowerTerm, Object upperTerm,
|
||||
boolean includeLower, boolean includeUpper) {
|
||||
float l = Float.NEGATIVE_INFINITY;
|
||||
float u = Float.POSITIVE_INFINITY;
|
||||
if (lowerTerm != null) {
|
||||
l = parse(lowerTerm);
|
||||
if (includeLower) {
|
||||
l = Math.nextDown(l);
|
||||
}
|
||||
l = HalfFloatPoint.nextUp(l);
|
||||
}
|
||||
if (upperTerm != null) {
|
||||
u = parse(upperTerm);
|
||||
if (includeUpper) {
|
||||
u = Math.nextUp(u);
|
||||
}
|
||||
u = HalfFloatPoint.nextDown(u);
|
||||
}
|
||||
return HalfFloatPoint.newRangeQuery(field, l, u);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Field> createFields(String name, Number value,
|
||||
boolean indexed, boolean docValued, boolean stored) {
|
||||
List<Field> fields = new ArrayList<>();
|
||||
if (indexed) {
|
||||
fields.add(new HalfFloatPoint(name, value.floatValue()));
|
||||
}
|
||||
if (docValued) {
|
||||
fields.add(new SortedNumericDocValuesField(name,
|
||||
HalfFloatPoint.halfFloatToSortableShort(value.floatValue())));
|
||||
}
|
||||
if (stored) {
|
||||
fields.add(new StoredField(name, value.floatValue()));
|
||||
}
|
||||
return fields;
|
||||
}
|
||||
|
||||
@Override
|
||||
FieldStats.Double stats(IndexReader reader, String fieldName,
|
||||
boolean isSearchable, boolean isAggregatable) throws IOException {
|
||||
long size = XPointValues.size(reader, fieldName);
|
||||
if (size == 0) {
|
||||
return null;
|
||||
}
|
||||
int docCount = XPointValues.getDocCount(reader, fieldName);
|
||||
byte[] min = XPointValues.getMinPackedValue(reader, fieldName);
|
||||
byte[] max = XPointValues.getMaxPackedValue(reader, fieldName);
|
||||
return new FieldStats.Double(reader.maxDoc(),docCount, -1L, size,
|
||||
isSearchable, isAggregatable,
|
||||
HalfFloatPoint.decodeDimension(min, 0), HalfFloatPoint.decodeDimension(max, 0));
|
||||
}
|
||||
},
|
||||
FLOAT("float", NumericType.FLOAT) {
|
||||
@Override
|
||||
Float parse(Object value) {
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
package org.elasticsearch.fieldstats;
|
||||
|
||||
import org.apache.lucene.document.HalfFloatPoint;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.action.ActionRequestValidationException;
|
||||
import org.elasticsearch.action.fieldstats.FieldStats;
|
||||
@ -54,6 +55,7 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
|
||||
"string", "type=text",
|
||||
"date", "type=date",
|
||||
"double", "type=double",
|
||||
"half_float", "type=half_float",
|
||||
"float", "type=float",
|
||||
"long", "type=long",
|
||||
"integer", "type=integer",
|
||||
@ -67,6 +69,7 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
|
||||
"string", "type=text,index=false",
|
||||
"date", "type=date,index=false",
|
||||
"double", "type=double,index=false",
|
||||
"half_float", "type=half_float",
|
||||
"float", "type=float,index=false",
|
||||
"long", "type=long,index=false",
|
||||
"integer", "type=integer,index=false",
|
||||
@ -81,6 +84,7 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
|
||||
"string", "type=text,index=false",
|
||||
"date", "type=date,index=false",
|
||||
"double", "type=double,index=false",
|
||||
"half_float", "type=half_float",
|
||||
"float", "type=float,index=false",
|
||||
"long", "type=long,index=false",
|
||||
"integer", "type=integer,index=false",
|
||||
@ -97,10 +101,12 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
|
||||
long maxInt = Integer.MIN_VALUE;
|
||||
long minLong = Long.MAX_VALUE;
|
||||
long maxLong = Long.MIN_VALUE;
|
||||
double minFloat = Float.MAX_VALUE;
|
||||
double maxFloat = Float.MIN_VALUE;
|
||||
double minDouble = Double.MAX_VALUE;
|
||||
double maxDouble = Double.MIN_VALUE;
|
||||
double minHalfFloat = Double.POSITIVE_INFINITY;
|
||||
double maxHalfFloat = Double.NEGATIVE_INFINITY;
|
||||
double minFloat = Double.POSITIVE_INFINITY;
|
||||
double maxFloat = Double.NEGATIVE_INFINITY;
|
||||
double minDouble = Double.POSITIVE_INFINITY;
|
||||
double maxDouble = Double.NEGATIVE_INFINITY;
|
||||
String minString = new String(Character.toChars(1114111));
|
||||
String maxString = "0";
|
||||
|
||||
@ -119,6 +125,10 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
|
||||
long l = randomLong();
|
||||
minLong = Math.min(minLong, l);
|
||||
maxLong = Math.max(maxLong, l);
|
||||
float hf = randomFloat();
|
||||
hf = HalfFloatPoint.sortableShortToHalfFloat(HalfFloatPoint.halfFloatToSortableShort(hf));
|
||||
minHalfFloat = Math.min(minHalfFloat, hf);
|
||||
maxHalfFloat = Math.max(maxHalfFloat, hf);
|
||||
float f = randomFloat();
|
||||
minFloat = Math.min(minFloat, f);
|
||||
maxFloat = Math.max(maxFloat, f);
|
||||
@ -138,6 +148,7 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
|
||||
"short", s,
|
||||
"integer", i,
|
||||
"long", l,
|
||||
"half_float", hf,
|
||||
"float", f,
|
||||
"double", d,
|
||||
"string", str)
|
||||
@ -147,7 +158,7 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
|
||||
|
||||
FieldStatsResponse response = client()
|
||||
.prepareFieldStats()
|
||||
.setFields("byte", "short", "integer", "long", "float", "double", "string").get();
|
||||
.setFields("byte", "short", "integer", "long", "half_float", "float", "double", "string").get();
|
||||
assertAllSuccessful(response);
|
||||
|
||||
for (FieldStats<?> stats : response.getAllFieldStats().values()) {
|
||||
@ -164,6 +175,8 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
|
||||
assertThat(response.getAllFieldStats().get("integer").getMaxValue(), equalTo(maxInt));
|
||||
assertThat(response.getAllFieldStats().get("long").getMinValue(), equalTo(minLong));
|
||||
assertThat(response.getAllFieldStats().get("long").getMaxValue(), equalTo(maxLong));
|
||||
assertThat(response.getAllFieldStats().get("half_float").getMinValue(), equalTo(minHalfFloat));
|
||||
assertThat(response.getAllFieldStats().get("half_float").getMaxValue(), equalTo(maxHalfFloat));
|
||||
assertThat(response.getAllFieldStats().get("float").getMinValue(), equalTo(minFloat));
|
||||
assertThat(response.getAllFieldStats().get("float").getMaxValue(), equalTo(maxFloat));
|
||||
assertThat(response.getAllFieldStats().get("double").getMinValue(), equalTo(minDouble));
|
||||
|
@ -108,6 +108,24 @@ public class FieldStatsTests extends ESSingleNodeTestCase {
|
||||
assertThat(result.getAllFieldStats().get(fieldName).getMinValueAsString(), equalTo(Double.toString(-1)));
|
||||
}
|
||||
|
||||
public void testHalfFloat() {
|
||||
String fieldName = "field";
|
||||
createIndex("test", Settings.EMPTY, "test", fieldName, "type=half_float");
|
||||
for (float value = -1; value <= 9; value++) {
|
||||
client().prepareIndex("test", "test").setSource(fieldName, value).get();
|
||||
}
|
||||
client().admin().indices().prepareRefresh().get();
|
||||
|
||||
FieldStatsResponse result = client().prepareFieldStats().setFields(fieldName).get();
|
||||
assertThat(result.getAllFieldStats().get(fieldName).getMaxDoc(), equalTo(11L));
|
||||
assertThat(result.getAllFieldStats().get(fieldName).getDocCount(), equalTo(11L));
|
||||
assertThat(result.getAllFieldStats().get(fieldName).getDensity(), equalTo(100));
|
||||
assertThat(result.getAllFieldStats().get(fieldName).getMinValue(), equalTo(-1d));
|
||||
assertThat(result.getAllFieldStats().get(fieldName).getMaxValue(), equalTo(9d));
|
||||
assertThat(result.getAllFieldStats().get(fieldName).getMinValueAsString(), equalTo(Float.toString(-1)));
|
||||
assertThat(result.getAllFieldStats().get(fieldName).getMaxValueAsString(), equalTo(Float.toString(9)));
|
||||
}
|
||||
|
||||
public void testFloat() {
|
||||
String fieldName = "field";
|
||||
createIndex("test", Settings.EMPTY, "test", fieldName, "type=float");
|
||||
|
@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.elasticsearch.index.fielddata.FieldData;
|
||||
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
|
||||
import org.elasticsearch.index.mapper.core.NumberFieldMapper;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class HalfFloatFielddataTests extends ESTestCase {
|
||||
|
||||
public void testSingleValued() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
// we need the default codec to check for singletons
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null).setCodec(TestUtil.getDefaultCodec()));
|
||||
Document doc = new Document();
|
||||
for (IndexableField f : NumberFieldMapper.NumberType.HALF_FLOAT.createFields("half_float", 3f, false, true, false)) {
|
||||
doc.add(f);
|
||||
}
|
||||
w.addDocument(doc);
|
||||
final DirectoryReader dirReader = DirectoryReader.open(w);
|
||||
LeafReader reader = getOnlyLeafReader(dirReader);
|
||||
SortedNumericDoubleValues values = new SortedNumericDVIndexFieldData.SortedNumericHalfFloatFieldData(
|
||||
reader, "half_float").getDoubleValues();
|
||||
assertNotNull(FieldData.unwrapSingleton(values));
|
||||
values.setDocument(0);
|
||||
assertEquals(1, values.count());
|
||||
assertEquals(3f, values.valueAt(0), 0f);
|
||||
IOUtils.close(dirReader, w, dir);
|
||||
}
|
||||
|
||||
public void testMultiValued() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
|
||||
Document doc = new Document();
|
||||
for (IndexableField f : NumberFieldMapper.NumberType.HALF_FLOAT.createFields("half_float", 3f, false, true, false)) {
|
||||
doc.add(f);
|
||||
}
|
||||
for (IndexableField f : NumberFieldMapper.NumberType.HALF_FLOAT.createFields("half_float", 2f, false, true, false)) {
|
||||
doc.add(f);
|
||||
}
|
||||
w.addDocument(doc);
|
||||
final DirectoryReader dirReader = DirectoryReader.open(w);
|
||||
LeafReader reader = getOnlyLeafReader(dirReader);
|
||||
SortedNumericDoubleValues values = new SortedNumericDVIndexFieldData.SortedNumericHalfFloatFieldData(
|
||||
reader, "half_float").getDoubleValues();
|
||||
assertNull(FieldData.unwrapSingleton(values));
|
||||
values.setDocument(0);
|
||||
assertEquals(2, values.count());
|
||||
assertEquals(2f, values.valueAt(0), 0f);
|
||||
assertEquals(3f, values.valueAt(1), 0f);
|
||||
IOUtils.close(dirReader, w, dir);
|
||||
}
|
||||
}
|
@ -21,8 +21,18 @@ package org.elasticsearch.index.mapper.core;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FloatPoint;
|
||||
import org.apache.lucene.document.HalfFloatPoint;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.index.mapper.FieldTypeTestCase;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType.Relation;
|
||||
@ -81,6 +91,7 @@ public class NumberFieldTypeTests extends FieldTypeTestCase {
|
||||
assertEquals((short) 3, NumberType.SHORT.parse(3d));
|
||||
assertEquals(3, NumberType.INTEGER.parse(3d));
|
||||
assertEquals(3L, NumberType.LONG.parse(3d));
|
||||
assertEquals(3f, NumberType.HALF_FLOAT.parse(3d));
|
||||
assertEquals(3f, NumberType.FLOAT.parse(3d));
|
||||
assertEquals(3d, NumberType.DOUBLE.parse(3d));
|
||||
|
||||
@ -103,7 +114,39 @@ public class NumberFieldTypeTests extends FieldTypeTestCase {
|
||||
assertEquals("Value [2147483648] is out of range for an integer", e.getMessage());
|
||||
e = expectThrows(IllegalArgumentException.class, () -> NumberType.LONG.parse(10000000000000000000d));
|
||||
assertEquals("Value [1.0E19] is out of range for a long", e.getMessage());
|
||||
assertEquals(1.1f, NumberType.FLOAT.parse(1.1)); // accuracy loss is expected
|
||||
assertEquals(1.1f, NumberType.HALF_FLOAT.parse(1.1));
|
||||
assertEquals(1.1f, NumberType.FLOAT.parse(1.1));
|
||||
assertEquals(1.1d, NumberType.DOUBLE.parse(1.1));
|
||||
}
|
||||
|
||||
public void testHalfFloatRange() throws IOException {
|
||||
// make sure the accuracy loss of half floats only occurs at index time
|
||||
// this test checks that searching half floats yields the same results as
|
||||
// searching floats that are rounded to the closest half float
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
|
||||
final int numDocs = 10000;
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
Document doc = new Document();
|
||||
float value = (randomFloat() * 2 - 1) * 70000;
|
||||
float rounded = HalfFloatPoint.sortableShortToHalfFloat(HalfFloatPoint.halfFloatToSortableShort(value));
|
||||
doc.add(new HalfFloatPoint("half_float", value));
|
||||
doc.add(new FloatPoint("float", rounded));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
final DirectoryReader reader = DirectoryReader.open(w);
|
||||
w.close();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
final int numQueries = 1000;
|
||||
for (int i = 0; i < numQueries; ++i) {
|
||||
float l = (randomFloat() * 2 - 1) * 70000;
|
||||
float u = (randomFloat() * 2 - 1) * 70000;
|
||||
boolean includeLower = randomBoolean();
|
||||
boolean includeUpper = randomBoolean();
|
||||
Query floatQ = NumberFieldMapper.NumberType.FLOAT.rangeQuery("float", l, u, includeLower, includeUpper);
|
||||
Query halfFloatQ = NumberFieldMapper.NumberType.HALF_FLOAT.rangeQuery("half_float", l, u, includeLower, includeUpper);
|
||||
assertEquals(searcher.count(floatQ), searcher.count(halfFloatQ));
|
||||
}
|
||||
IOUtils.close(reader, dir);
|
||||
}
|
||||
}
|
||||
|
@ -4,12 +4,13 @@
|
||||
The following numeric types are supported:
|
||||
|
||||
[horizontal]
|
||||
`long`:: A signed 64-bit integer with a minimum value of +-2^63^+ and a maximum value of +2^63^-1+.
|
||||
`integer`:: A signed 32-bit integer with a minimum value of +-2^31^+ and a maximum value of +2^31^-1+.
|
||||
`short`:: A signed 16-bit integer with a minimum value of +-32,768+ and a maximum value of +32,767+.
|
||||
`byte`:: A signed 8-bit integer with a minimum value of +-128+ and a maximum value of +127+.
|
||||
`double`:: A double-precision 64-bit IEEE 754 floating point.
|
||||
`float`:: A single-precision 32-bit IEEE 754 floating point.
|
||||
`long`:: A signed 64-bit integer with a minimum value of +-2^63^+ and a maximum value of +2^63^-1+.
|
||||
`integer`:: A signed 32-bit integer with a minimum value of +-2^31^+ and a maximum value of +2^31^-1+.
|
||||
`short`:: A signed 16-bit integer with a minimum value of +-32,768+ and a maximum value of +32,767+.
|
||||
`byte`:: A signed 8-bit integer with a minimum value of +-128+ and a maximum value of +127+.
|
||||
`double`:: A double-precision 64-bit IEEE 754 floating point.
|
||||
`float`:: A single-precision 32-bit IEEE 754 floating point.
|
||||
`half_float`:: A half-precision 16-bit IEEE 754 floating point.
|
||||
|
||||
Below is an example of configuring a mapping with numeric fields:
|
||||
|
||||
@ -33,6 +34,34 @@ PUT my_index
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
==== Which type should I use?
|
||||
|
||||
As far as integer types (`byte`, `short`, `integer` and `long`) are concerned,
|
||||
you should pick the smallest type which is enough for your use-case. This will
|
||||
help indexing and searching be more efficient. Note however that given that
|
||||
storage is optimized based on the actual values that are stored, picking one
|
||||
type over another one will have no impact on storage requirements.
|
||||
|
||||
For floating-point types, picking the smallest type that is enough for the
|
||||
use-case will still help indexing and searching be more efficient. However,
|
||||
given that floating-point data is hard to compress, it might also have a
|
||||
significant impact on storage requirements. Here is a table that compares the
|
||||
3 floating-point types that are available in order to help make a decision.
|
||||
|
||||
[cols="<,<,<,<",options="header",]
|
||||
|=======================================================================
|
||||
|Type |Minimum value |Maximum value |Significant bits / digits
|
||||
|`double`|+2^-1074^+ |+(2-2^-52^)·2^1023^+ |+53+ / +15.95+
|
||||
|`float`|+2^-149^+ |+(2-2^-23^)·2^127^+ |+24+ / +7.22+
|
||||
|`half_float`|+2^-24^+ |+65504+ |+11+ / +3.31+
|
||||
|=======================================================================
|
||||
|
||||
When possible, it is often more efficient to store floating-point data into an
|
||||
integer using a scaling factor. For instance, it is more efficient to store
|
||||
percentages as integers between 0 and 100 than as floating-point numbers between 0
|
||||
and 1. Another example would be prices: it will be more efficient to store prices
|
||||
as a number of cents, which is an integer, than as a floating-point number.
|
||||
|
||||
[[number-params]]
|
||||
==== Parameters for numeric fields
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user