Change numeric data types to use SORTED_NUMERIC docvalues type
instead of a custom encoding in BINARY. In low level benchmarks this is 2x to 5x faster: its also optimized for the common case where fields actually only contain at most one value for each document. Additionally SORTED_NUMERIC doesn't lose values if they appear more than once, so mathematical computations such as averages are correct. Closes #6967
This commit is contained in:
parent
ff2903d2c6
commit
66825ac851
|
@ -22,6 +22,7 @@ package org.elasticsearch.index.fielddata.plain;
|
||||||
import com.google.common.collect.ImmutableSet;
|
import com.google.common.collect.ImmutableSet;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||||
|
import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.common.logging.ESLogger;
|
import org.elasticsearch.common.logging.ESLogger;
|
||||||
import org.elasticsearch.common.logging.Loggers;
|
import org.elasticsearch.common.logging.Loggers;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -107,7 +108,12 @@ public abstract class DocValuesIndexFieldData {
|
||||||
assert !numericType.isFloatingPoint();
|
assert !numericType.isFloatingPoint();
|
||||||
return new NumericDVIndexFieldData(index, fieldNames, mapper.fieldDataType());
|
return new NumericDVIndexFieldData(index, fieldNames, mapper.fieldDataType());
|
||||||
} else if (numericType != null) {
|
} else if (numericType != null) {
|
||||||
return new BinaryDVNumericIndexFieldData(index, fieldNames, numericType, mapper.fieldDataType());
|
if (Version.indexCreated(indexSettings).onOrAfter(Version.V_1_4_0)) {
|
||||||
|
return new SortedNumericDVIndexFieldData(index, fieldNames, numericType, mapper.fieldDataType());
|
||||||
|
} else {
|
||||||
|
// prior to ES 1.4: multi-valued numerics were boxed inside a byte[] as BINARY
|
||||||
|
return new BinaryDVNumericIndexFieldData(index, fieldNames, numericType, mapper.fieldDataType());
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
return new SortedSetDVOrdinalsIndexFieldData(index, cache, indexSettings, fieldNames, breakerService, mapper.fieldDataType());
|
return new SortedSetDVOrdinalsIndexFieldData(index, cache, indexSettings, fieldNames, breakerService, mapper.fieldDataType());
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,292 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.fielddata.plain;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import org.apache.lucene.index.*;
|
||||||
|
import org.apache.lucene.util.NumericUtils;
|
||||||
|
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.fielddata.*;
|
||||||
|
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
|
||||||
|
import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource;
|
||||||
|
import org.elasticsearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource;
|
||||||
|
import org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource;
|
||||||
|
import org.elasticsearch.index.mapper.FieldMapper.Names;
|
||||||
|
import org.elasticsearch.search.MultiValueMode;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* FieldData backed by {@link AtomicReader#getSortedNumericDocValues(String)}
|
||||||
|
* @see FieldInfo.DocValuesType#SORTED_NUMERIC
|
||||||
|
*/
|
||||||
|
public class SortedNumericDVIndexFieldData extends DocValuesIndexFieldData implements IndexNumericFieldData {
|
||||||
|
private final NumericType numericType;
|
||||||
|
|
||||||
|
public SortedNumericDVIndexFieldData(Index index, Names fieldNames, NumericType numericType, FieldDataType fieldDataType) {
|
||||||
|
super(index, fieldNames, fieldDataType);
|
||||||
|
Preconditions.checkArgument(numericType != null, "numericType must be non-null");
|
||||||
|
this.numericType = numericType;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource comparatorSource(Object missingValue, MultiValueMode sortMode, Nested nested) {
|
||||||
|
switch (numericType) {
|
||||||
|
case FLOAT:
|
||||||
|
return new FloatValuesComparatorSource(this, missingValue, sortMode, nested);
|
||||||
|
case DOUBLE:
|
||||||
|
return new DoubleValuesComparatorSource(this, missingValue, sortMode, nested);
|
||||||
|
default:
|
||||||
|
assert !numericType.isFloatingPoint();
|
||||||
|
return new LongValuesComparatorSource(this, missingValue, sortMode, nested);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public NumericType getNumericType() {
|
||||||
|
return numericType;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
|
||||||
|
return load(context);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AtomicNumericFieldData load(AtomicReaderContext context) {
|
||||||
|
final AtomicReader reader = context.reader();
|
||||||
|
final String field = fieldNames.indexName();
|
||||||
|
|
||||||
|
switch (numericType) {
|
||||||
|
case FLOAT:
|
||||||
|
return new SortedNumericFloatFieldData(reader, field);
|
||||||
|
case DOUBLE:
|
||||||
|
return new SortedNumericDoubleFieldData(reader, field);
|
||||||
|
default:
|
||||||
|
return new SortedNumericLongFieldData(reader, field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* FieldData implementation for integral types.
|
||||||
|
* <p>
|
||||||
|
* Order of values within a document is consistent with
|
||||||
|
* {@link Long#compareTo(Long)}.
|
||||||
|
* <p>
|
||||||
|
* Although the API is multi-valued, most codecs in Lucene specialize
|
||||||
|
* for the case where documents have at most one value. In this case
|
||||||
|
* {@link DocValues#unwrapSingleton(SortedNumericDocValues)} will return
|
||||||
|
* the underlying single-valued NumericDocValues representation, and
|
||||||
|
* {@link DocValues#unwrapSingletonBits(SortedNumericDocValues)} will return
|
||||||
|
* a Bits matching documents that have a real value (as opposed to missing).
|
||||||
|
*/
|
||||||
|
static final class SortedNumericLongFieldData extends AtomicLongFieldData {
|
||||||
|
final AtomicReader reader;
|
||||||
|
final String field;
|
||||||
|
|
||||||
|
SortedNumericLongFieldData(AtomicReader reader, String field) {
|
||||||
|
super(-1L);
|
||||||
|
this.reader = reader;
|
||||||
|
this.field = field;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedNumericDocValues getLongValues() {
|
||||||
|
try {
|
||||||
|
return DocValues.getSortedNumeric(reader, field);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ElasticsearchIllegalStateException("Cannot load doc values", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* FieldData implementation for 32-bit float values.
|
||||||
|
* <p>
|
||||||
|
* Order of values within a document is consistent with
|
||||||
|
* {@link Float#compareTo(Float)}, hence the following reversible
|
||||||
|
* transformation is applied at both index and search:
|
||||||
|
* {code}
|
||||||
|
* bits ^ (bits >> 31) & 0x7fffffff
|
||||||
|
* {code}
|
||||||
|
* <p>
|
||||||
|
* Although the API is multi-valued, most codecs in Lucene specialize
|
||||||
|
* for the case where documents have at most one value. In this case
|
||||||
|
* {@link FieldData#unwrapSingleton(SortedNumericDoubleValues)} will return
|
||||||
|
* the underlying single-valued NumericDoubleValues representation, and
|
||||||
|
* {@link FieldData#unwrapSingletonBits(SortedNumericDoubleValues)} will return
|
||||||
|
* a Bits matching documents that have a real value (as opposed to missing).
|
||||||
|
*/
|
||||||
|
static final class SortedNumericFloatFieldData extends AtomicDoubleFieldData {
|
||||||
|
final AtomicReader reader;
|
||||||
|
final String field;
|
||||||
|
|
||||||
|
SortedNumericFloatFieldData(AtomicReader reader, String field) {
|
||||||
|
super(-1L);
|
||||||
|
this.reader = reader;
|
||||||
|
this.field = field;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedNumericDoubleValues getDoubleValues() {
|
||||||
|
try {
|
||||||
|
SortedNumericDocValues raw = DocValues.getSortedNumeric(reader, field);
|
||||||
|
|
||||||
|
NumericDocValues single = DocValues.unwrapSingleton(raw);
|
||||||
|
if (single != null) {
|
||||||
|
return FieldData.singleton(new SingleFloatValues(single), DocValues.unwrapSingletonBits(raw));
|
||||||
|
} else {
|
||||||
|
return new MultiFloatValues(raw);
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ElasticsearchIllegalStateException("Cannot load doc values", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wraps a NumericDocValues and exposes a single 32-bit float per document.
|
||||||
|
*/
|
||||||
|
static final class SingleFloatValues extends NumericDoubleValues {
|
||||||
|
final NumericDocValues in;
|
||||||
|
|
||||||
|
SingleFloatValues(NumericDocValues in) {
|
||||||
|
this.in = in;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double get(int docID) {
|
||||||
|
return NumericUtils.sortableIntToFloat((int) in.get(docID));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wraps a SortedNumericDocValues and exposes multiple 32-bit floats per document.
|
||||||
|
*/
|
||||||
|
static final class MultiFloatValues extends SortedNumericDoubleValues {
|
||||||
|
final SortedNumericDocValues in;
|
||||||
|
|
||||||
|
MultiFloatValues(SortedNumericDocValues in) {
|
||||||
|
this.in = in;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setDocument(int doc) {
|
||||||
|
in.setDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double valueAt(int index) {
|
||||||
|
return NumericUtils.sortableIntToFloat((int) in.valueAt(index));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int count() {
|
||||||
|
return in.count();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* FieldData implementation for 64-bit double values.
|
||||||
|
* <p>
|
||||||
|
* Order of values within a document is consistent with
|
||||||
|
* {@link Double#compareTo(Double)}, hence the following reversible
|
||||||
|
* transformation is applied at both index and search:
|
||||||
|
* {code}
|
||||||
|
* bits ^ (bits >> 63) & 0x7fffffffffffffffL
|
||||||
|
* {code}
|
||||||
|
* <p>
|
||||||
|
* Although the API is multi-valued, most codecs in Lucene specialize
|
||||||
|
* for the case where documents have at most one value. In this case
|
||||||
|
* {@link FieldData#unwrapSingleton(SortedNumericDoubleValues)} will return
|
||||||
|
* the underlying single-valued NumericDoubleValues representation, and
|
||||||
|
* {@link FieldData#unwrapSingletonBits(SortedNumericDoubleValues)} will return
|
||||||
|
* a Bits matching documents that have a real value (as opposed to missing).
|
||||||
|
*/
|
||||||
|
static final class SortedNumericDoubleFieldData extends AtomicDoubleFieldData {
|
||||||
|
final AtomicReader reader;
|
||||||
|
final String field;
|
||||||
|
|
||||||
|
SortedNumericDoubleFieldData(AtomicReader reader, String field) {
|
||||||
|
super(-1L);
|
||||||
|
this.reader = reader;
|
||||||
|
this.field = field;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedNumericDoubleValues getDoubleValues() {
|
||||||
|
try {
|
||||||
|
SortedNumericDocValues raw = DocValues.getSortedNumeric(reader, field);
|
||||||
|
|
||||||
|
NumericDocValues single = DocValues.unwrapSingleton(raw);
|
||||||
|
if (single != null) {
|
||||||
|
return FieldData.singleton(new SingleDoubleValues(single), DocValues.unwrapSingletonBits(raw));
|
||||||
|
} else {
|
||||||
|
return new MultiDoubleValues(raw);
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ElasticsearchIllegalStateException("Cannot load doc values", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wraps a NumericDocValues and exposes a single 64-bit double per document.
|
||||||
|
*/
|
||||||
|
static final class SingleDoubleValues extends NumericDoubleValues {
|
||||||
|
final NumericDocValues in;
|
||||||
|
|
||||||
|
SingleDoubleValues(NumericDocValues in) {
|
||||||
|
this.in = in;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double get(int docID) {
|
||||||
|
return NumericUtils.sortableLongToDouble(in.get(docID));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wraps a SortedNumericDocValues and exposes multiple 64-bit doubles per document.
|
||||||
|
*/
|
||||||
|
static final class MultiDoubleValues extends SortedNumericDoubleValues {
|
||||||
|
final SortedNumericDocValues in;
|
||||||
|
|
||||||
|
MultiDoubleValues(SortedNumericDocValues in) {
|
||||||
|
this.in = in;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setDocument(int doc) {
|
||||||
|
in.setDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double valueAt(int index) {
|
||||||
|
return NumericUtils.sortableLongToDouble(in.valueAt(index));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int count() {
|
||||||
|
return in.count();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -320,7 +320,7 @@ public class ByteFieldMapper extends NumberFieldMapper<Byte> {
|
||||||
fields.add(field);
|
fields.add(field);
|
||||||
}
|
}
|
||||||
if (hasDocValues()) {
|
if (hasDocValues()) {
|
||||||
addDocValue(context, value);
|
addDocValue(context, fields, value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -41,7 +41,6 @@ import org.elasticsearch.common.unit.Fuzziness;
|
||||||
import org.elasticsearch.common.util.LocaleUtils;
|
import org.elasticsearch.common.util.LocaleUtils;
|
||||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
|
||||||
import org.elasticsearch.index.analysis.NumericDateAnalyzer;
|
import org.elasticsearch.index.analysis.NumericDateAnalyzer;
|
||||||
import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
|
import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
|
||||||
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
|
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
|
||||||
|
@ -514,7 +513,7 @@ public class DateFieldMapper extends NumberFieldMapper<Long> {
|
||||||
fields.add(field);
|
fields.add(field);
|
||||||
}
|
}
|
||||||
if (hasDocValues()) {
|
if (hasDocValues()) {
|
||||||
addDocValue(context, value);
|
addDocValue(context, fields, value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -316,12 +316,16 @@ public class DoubleFieldMapper extends NumberFieldMapper<Double> {
|
||||||
fields.add(field);
|
fields.add(field);
|
||||||
}
|
}
|
||||||
if (hasDocValues()) {
|
if (hasDocValues()) {
|
||||||
CustomDoubleNumericDocValuesField field = (CustomDoubleNumericDocValuesField) context.doc().getByKey(names().indexName());
|
if (useSortedNumericDocValues) {
|
||||||
if (field != null) {
|
addDocValue(context, fields, NumericUtils.doubleToSortableLong(value));
|
||||||
field.add(value);
|
|
||||||
} else {
|
} else {
|
||||||
field = new CustomDoubleNumericDocValuesField(names().indexName(), value);
|
CustomDoubleNumericDocValuesField field = (CustomDoubleNumericDocValuesField) context.doc().getByKey(names().indexName());
|
||||||
context.doc().addWithKey(names().indexName(), field);
|
if (field != null) {
|
||||||
|
field.add(value);
|
||||||
|
} else {
|
||||||
|
field = new CustomDoubleNumericDocValuesField(names().indexName(), value);
|
||||||
|
context.doc().addWithKey(names().indexName(), field);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -321,12 +321,16 @@ public class FloatFieldMapper extends NumberFieldMapper<Float> {
|
||||||
fields.add(field);
|
fields.add(field);
|
||||||
}
|
}
|
||||||
if (hasDocValues()) {
|
if (hasDocValues()) {
|
||||||
CustomFloatNumericDocValuesField field = (CustomFloatNumericDocValuesField) context.doc().getByKey(names().indexName());
|
if (useSortedNumericDocValues) {
|
||||||
if (field != null) {
|
addDocValue(context, fields, NumericUtils.floatToSortableInt(value));
|
||||||
field.add(value);
|
|
||||||
} else {
|
} else {
|
||||||
field = new CustomFloatNumericDocValuesField(names().indexName(), value);
|
CustomFloatNumericDocValuesField field = (CustomFloatNumericDocValuesField) context.doc().getByKey(names().indexName());
|
||||||
context.doc().addWithKey(names().indexName(), field);
|
if (field != null) {
|
||||||
|
field.add(value);
|
||||||
|
} else {
|
||||||
|
field = new CustomFloatNumericDocValuesField(names().indexName(), value);
|
||||||
|
context.doc().addWithKey(names().indexName(), field);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -319,7 +319,7 @@ public class IntegerFieldMapper extends NumberFieldMapper<Integer> {
|
||||||
fields.add(field);
|
fields.add(field);
|
||||||
}
|
}
|
||||||
if (hasDocValues()) {
|
if (hasDocValues()) {
|
||||||
addDocValue(context, value);
|
addDocValue(context, fields, value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -305,7 +305,7 @@ public class LongFieldMapper extends NumberFieldMapper<Long> {
|
||||||
fields.add(field);
|
fields.add(field);
|
||||||
}
|
}
|
||||||
if (hasDocValues()) {
|
if (hasDocValues()) {
|
||||||
addDocValue(context, value);
|
addDocValue(context, fields, value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.analysis.NumericTokenStream;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
|
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
|
@ -35,6 +36,7 @@ import org.apache.lucene.search.Filter;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.common.Explicit;
|
import org.elasticsearch.common.Explicit;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -138,6 +140,14 @@ public abstract class NumberFieldMapper<T extends Number> extends AbstractFieldM
|
||||||
|
|
||||||
protected Explicit<Boolean> coerce;
|
protected Explicit<Boolean> coerce;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* True if index version is 1.4+
|
||||||
|
* <p>
|
||||||
|
* In this case numerics are encoded with SORTED_NUMERIC docvalues,
|
||||||
|
* otherwise for older indexes we must continue to write BINARY (for now)
|
||||||
|
*/
|
||||||
|
protected final boolean useSortedNumericDocValues;
|
||||||
|
|
||||||
private ThreadLocal<NumericTokenStream> tokenStream = new ThreadLocal<NumericTokenStream>() {
|
private ThreadLocal<NumericTokenStream> tokenStream = new ThreadLocal<NumericTokenStream>() {
|
||||||
@Override
|
@Override
|
||||||
protected NumericTokenStream initialValue() {
|
protected NumericTokenStream initialValue() {
|
||||||
|
@ -189,6 +199,8 @@ public abstract class NumberFieldMapper<T extends Number> extends AbstractFieldM
|
||||||
}
|
}
|
||||||
this.ignoreMalformed = ignoreMalformed;
|
this.ignoreMalformed = ignoreMalformed;
|
||||||
this.coerce = coerce;
|
this.coerce = coerce;
|
||||||
|
Version v = indexSettings == null ? Version.CURRENT : Version.indexCreated(indexSettings);
|
||||||
|
this.useSortedNumericDocValues = v.onOrAfter(Version.V_1_4_0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -234,13 +246,17 @@ public abstract class NumberFieldMapper<T extends Number> extends AbstractFieldM
|
||||||
|
|
||||||
protected abstract void innerParseCreateField(ParseContext context, List<Field> fields) throws IOException;
|
protected abstract void innerParseCreateField(ParseContext context, List<Field> fields) throws IOException;
|
||||||
|
|
||||||
protected final void addDocValue(ParseContext context, long value) {
|
protected final void addDocValue(ParseContext context, List<Field> fields, long value) {
|
||||||
CustomLongNumericDocValuesField field = (CustomLongNumericDocValuesField) context.doc().getByKey(names().indexName());
|
if (useSortedNumericDocValues) {
|
||||||
if (field != null) {
|
fields.add(new SortedNumericDocValuesField(names().indexName(), value));
|
||||||
field.add(value);
|
|
||||||
} else {
|
} else {
|
||||||
field = new CustomLongNumericDocValuesField(names().indexName(), value);
|
CustomLongNumericDocValuesField field = (CustomLongNumericDocValuesField) context.doc().getByKey(names().indexName());
|
||||||
context.doc().addWithKey(names().indexName(), field);
|
if (field != null) {
|
||||||
|
field.add(value);
|
||||||
|
} else {
|
||||||
|
field = new CustomLongNumericDocValuesField(names().indexName(), value);
|
||||||
|
context.doc().addWithKey(names().indexName(), field);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -321,7 +321,7 @@ public class ShortFieldMapper extends NumberFieldMapper<Short> {
|
||||||
fields.add(field);
|
fields.add(field);
|
||||||
}
|
}
|
||||||
if (hasDocValues()) {
|
if (hasDocValues()) {
|
||||||
addDocValue(context, value);
|
addDocValue(context, fields, value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -298,7 +298,7 @@ public class IpFieldMapper extends NumberFieldMapper<Long> {
|
||||||
fields.add(field);
|
fields.add(field);
|
||||||
}
|
}
|
||||||
if (hasDocValues()) {
|
if (hasDocValues()) {
|
||||||
addDocValue(context, value);
|
addDocValue(context, fields, value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ package org.elasticsearch.index.fielddata;
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
import org.apache.lucene.index.*;
|
import org.apache.lucene.index.*;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -34,6 +35,8 @@ import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
|
||||||
// we might wanna cut this over to LuceneTestCase
|
// we might wanna cut this over to LuceneTestCase
|
||||||
|
@SuppressCodecs({"Lucene3x", "Lucene40", "Lucene41", "Lucene42", "Lucene45", "Lucene46"})
|
||||||
|
// avoid codecs that do not support SortedNumerics, SortedSet, etc
|
||||||
public abstract class AbstractFieldDataTests extends ElasticsearchSingleNodeTest {
|
public abstract class AbstractFieldDataTests extends ElasticsearchSingleNodeTest {
|
||||||
|
|
||||||
protected IndexService indexService;
|
protected IndexService indexService;
|
||||||
|
|
|
@ -101,9 +101,7 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
|
||||||
typeMap.put(new FieldDataType("long", ImmutableSettings.builder().put("format", "doc_values")), Type.Long);
|
typeMap.put(new FieldDataType("long", ImmutableSettings.builder().put("format", "doc_values")), Type.Long);
|
||||||
typeMap.put(new FieldDataType("double", ImmutableSettings.builder().put("format", "doc_values")), Type.Double);
|
typeMap.put(new FieldDataType("double", ImmutableSettings.builder().put("format", "doc_values")), Type.Double);
|
||||||
typeMap.put(new FieldDataType("float", ImmutableSettings.builder().put("format", "doc_values")), Type.Float);
|
typeMap.put(new FieldDataType("float", ImmutableSettings.builder().put("format", "doc_values")), Type.Float);
|
||||||
if (LuceneTestCase.defaultCodecSupportsSortedSet()) {
|
typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes);
|
||||||
typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes);
|
|
||||||
}
|
|
||||||
ArrayList<Entry<FieldDataType, Type>> list = new ArrayList<>(typeMap.entrySet());
|
ArrayList<Entry<FieldDataType, Type>> list = new ArrayList<>(typeMap.entrySet());
|
||||||
Preprocessor pre = new ToDoublePreprocessor();
|
Preprocessor pre = new ToDoublePreprocessor();
|
||||||
while (!list.isEmpty()) {
|
while (!list.isEmpty()) {
|
||||||
|
@ -149,13 +147,17 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
|
||||||
final int maxNumValues = randomBoolean() ? 1 : randomIntBetween(2, 40);
|
final int maxNumValues = randomBoolean() ? 1 : randomIntBetween(2, 40);
|
||||||
byte[] values = new byte[maxNumValues];
|
byte[] values = new byte[maxNumValues];
|
||||||
for (int i = 0; i < atLeast; i++) {
|
for (int i = 0; i < atLeast; i++) {
|
||||||
final int numValues = randomInt(maxNumValues);
|
int numValues = randomInt(maxNumValues);
|
||||||
|
// FD loses values if they are duplicated, so we must deduplicate for this test
|
||||||
|
Set<Byte> vals = new HashSet<Byte>();
|
||||||
for (int j = 0; j < numValues; ++j) {
|
for (int j = 0; j < numValues; ++j) {
|
||||||
if (randomBoolean()) {
|
vals.add(randomByte());
|
||||||
values[j] = 1; // test deduplication
|
}
|
||||||
} else {
|
|
||||||
values[j] = randomByte();
|
numValues = vals.size();
|
||||||
}
|
int upto = 0;
|
||||||
|
for (Byte bb : vals) {
|
||||||
|
values[upto++] = bb.byteValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
XContentBuilder doc = XContentFactory.jsonBuilder().startObject();
|
XContentBuilder doc = XContentFactory.jsonBuilder().startObject();
|
||||||
|
@ -227,15 +229,22 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
|
||||||
final int maxNumValues = randomBoolean() ? 1 : randomIntBetween(2, 40);
|
final int maxNumValues = randomBoolean() ? 1 : randomIntBetween(2, 40);
|
||||||
float[] values = new float[maxNumValues];
|
float[] values = new float[maxNumValues];
|
||||||
for (int i = 0; i < atLeast; i++) {
|
for (int i = 0; i < atLeast; i++) {
|
||||||
final int numValues = randomInt(maxNumValues);
|
int numValues = randomInt(maxNumValues);
|
||||||
float def = randomBoolean() ? randomFloat() : Float.NaN;
|
float def = randomBoolean() ? randomFloat() : Float.NaN;
|
||||||
|
// FD loses values if they are duplicated, so we must deduplicate for this test
|
||||||
|
Set<Float> vals = new HashSet<Float>();
|
||||||
for (int j = 0; j < numValues; ++j) {
|
for (int j = 0; j < numValues; ++j) {
|
||||||
if (randomBoolean()) {
|
if (randomBoolean()) {
|
||||||
values[j] = def;
|
vals.add(def);
|
||||||
} else {
|
} else {
|
||||||
values[j] = randomFloat();
|
vals.add(randomFloat());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
numValues = vals.size();
|
||||||
|
int upto = 0;
|
||||||
|
for (Float f : vals) {
|
||||||
|
values[upto++] = f.floatValue();
|
||||||
|
}
|
||||||
|
|
||||||
XContentBuilder doc = XContentFactory.jsonBuilder().startObject().startArray("float");
|
XContentBuilder doc = XContentFactory.jsonBuilder().startObject().startArray("float");
|
||||||
for (int j = 0; j < numValues; ++j) {
|
for (int j = 0; j < numValues; ++j) {
|
||||||
|
@ -302,15 +311,11 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
|
||||||
for (int j : numbers) {
|
for (int j : numbers) {
|
||||||
final String s = English.longToEnglish(j);
|
final String s = English.longToEnglish(j);
|
||||||
d.add(new StringField("bytes", s, Field.Store.NO));
|
d.add(new StringField("bytes", s, Field.Store.NO));
|
||||||
if (LuceneTestCase.defaultCodecSupportsSortedSet()) {
|
d.add(new SortedSetDocValuesField("bytes", new BytesRef(s)));
|
||||||
d.add(new SortedSetDocValuesField("bytes", new BytesRef(s)));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (random.nextInt(10) == 0) {
|
if (random.nextInt(10) == 0) {
|
||||||
d.add(new StringField("bytes", "", Field.Store.NO));
|
d.add(new StringField("bytes", "", Field.Store.NO));
|
||||||
if (LuceneTestCase.defaultCodecSupportsSortedSet()) {
|
d.add(new SortedSetDocValuesField("bytes", new BytesRef()));
|
||||||
d.add(new SortedSetDocValuesField("bytes", new BytesRef()));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
writer.addDocument(d);
|
writer.addDocument(d);
|
||||||
|
@ -322,9 +327,7 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
|
||||||
Map<FieldDataType, Type> typeMap = new HashMap<>();
|
Map<FieldDataType, Type> typeMap = new HashMap<>();
|
||||||
typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "fst")), Type.Bytes);
|
typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "fst")), Type.Bytes);
|
||||||
typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "paged_bytes")), Type.Bytes);
|
typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "paged_bytes")), Type.Bytes);
|
||||||
if (LuceneTestCase.defaultCodecSupportsSortedSet()) {
|
typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes);
|
||||||
typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes);
|
|
||||||
}
|
|
||||||
// TODO add filters
|
// TODO add filters
|
||||||
ArrayList<Entry<FieldDataType, Type>> list = new ArrayList<>(typeMap.entrySet());
|
ArrayList<Entry<FieldDataType, Type>> list = new ArrayList<>(typeMap.entrySet());
|
||||||
Preprocessor pre = new Preprocessor();
|
Preprocessor pre = new Preprocessor();
|
||||||
|
@ -371,9 +374,7 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
|
||||||
for (int j = 0; j < numVals; ++j) {
|
for (int j = 0; j < numVals; ++j) {
|
||||||
final String value = RandomPicks.randomFrom(random, Arrays.asList(values));
|
final String value = RandomPicks.randomFrom(random, Arrays.asList(values));
|
||||||
d.add(new StringField("string", value, Field.Store.NO));
|
d.add(new StringField("string", value, Field.Store.NO));
|
||||||
if (LuceneTestCase.defaultCodecSupportsSortedSet()) {
|
d.add(new SortedSetDocValuesField("bytes", new BytesRef(value)));
|
||||||
d.add(new SortedSetDocValuesField("bytes", new BytesRef(value)));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
writer.addDocument(d);
|
writer.addDocument(d);
|
||||||
if (randomInt(10) == 0) {
|
if (randomInt(10) == 0) {
|
||||||
|
@ -385,9 +386,7 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
|
||||||
Map<FieldDataType, Type> typeMap = new HashMap<FieldDataType, DuelFieldDataTests.Type>();
|
Map<FieldDataType, Type> typeMap = new HashMap<FieldDataType, DuelFieldDataTests.Type>();
|
||||||
typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "fst")), Type.Bytes);
|
typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "fst")), Type.Bytes);
|
||||||
typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "paged_bytes")), Type.Bytes);
|
typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "paged_bytes")), Type.Bytes);
|
||||||
if (LuceneTestCase.defaultCodecSupportsSortedSet()) {
|
typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes);
|
||||||
typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (Map.Entry<FieldDataType, Type> entry : typeMap.entrySet()) {
|
for (Map.Entry<FieldDataType, Type> entry : typeMap.entrySet()) {
|
||||||
ifdService.clear();
|
ifdService.clear();
|
||||||
|
|
|
@ -68,7 +68,7 @@ public class IndexFieldDataServiceTests extends ElasticsearchSingleNodeTest {
|
||||||
ifdService.clear();
|
ifdService.clear();
|
||||||
fd = ifdService.getForField(mapper);
|
fd = ifdService.getForField(mapper);
|
||||||
if (docValues) {
|
if (docValues) {
|
||||||
assertTrue(fd instanceof BinaryDVNumericIndexFieldData);
|
assertTrue(fd instanceof SortedNumericDVIndexFieldData);
|
||||||
} else {
|
} else {
|
||||||
assertTrue(fd instanceof PackedArrayIndexFieldData);
|
assertTrue(fd instanceof PackedArrayIndexFieldData);
|
||||||
}
|
}
|
||||||
|
@ -78,7 +78,7 @@ public class IndexFieldDataServiceTests extends ElasticsearchSingleNodeTest {
|
||||||
ifdService.clear();
|
ifdService.clear();
|
||||||
fd = ifdService.getForField(floatMapper);
|
fd = ifdService.getForField(floatMapper);
|
||||||
if (docValues) {
|
if (docValues) {
|
||||||
assertTrue(fd instanceof BinaryDVNumericIndexFieldData);
|
assertTrue(fd instanceof SortedNumericDVIndexFieldData);
|
||||||
} else {
|
} else {
|
||||||
assertTrue(fd instanceof FloatArrayIndexFieldData);
|
assertTrue(fd instanceof FloatArrayIndexFieldData);
|
||||||
}
|
}
|
||||||
|
@ -87,7 +87,7 @@ public class IndexFieldDataServiceTests extends ElasticsearchSingleNodeTest {
|
||||||
ifdService.clear();
|
ifdService.clear();
|
||||||
fd = ifdService.getForField(doubleMapper);
|
fd = ifdService.getForField(doubleMapper);
|
||||||
if (docValues) {
|
if (docValues) {
|
||||||
assertTrue(fd instanceof BinaryDVNumericIndexFieldData);
|
assertTrue(fd instanceof SortedNumericDVIndexFieldData);
|
||||||
} else {
|
} else {
|
||||||
assertTrue(fd instanceof DoubleArrayIndexFieldData);
|
assertTrue(fd instanceof DoubleArrayIndexFieldData);
|
||||||
}
|
}
|
||||||
|
|
|
@ -279,8 +279,8 @@ public class SimpleNumericTests extends ElasticsearchSingleNodeTest {
|
||||||
.endObject()
|
.endObject()
|
||||||
.bytes());
|
.bytes());
|
||||||
final Document doc = parsedDoc.rootDoc();
|
final Document doc = parsedDoc.rootDoc();
|
||||||
assertEquals(DocValuesType.BINARY, SimpleStringMappingTests.docValuesType(doc, "int"));
|
assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "int"));
|
||||||
assertEquals(DocValuesType.BINARY, SimpleStringMappingTests.docValuesType(doc, "double"));
|
assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "double"));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testDocValuesOnNested() throws Exception {
|
public void testDocValuesOnNested() throws Exception {
|
||||||
|
@ -326,8 +326,8 @@ public class SimpleNumericTests extends ElasticsearchSingleNodeTest {
|
||||||
if (doc == parsedDoc.rootDoc()) {
|
if (doc == parsedDoc.rootDoc()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
assertEquals(DocValuesType.BINARY, SimpleStringMappingTests.docValuesType(doc, "nested.int"));
|
assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "nested.int"));
|
||||||
assertEquals(DocValuesType.BINARY, SimpleStringMappingTests.docValuesType(doc, "nested.double"));
|
assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "nested.double"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue