Change numeric data types to use SORTED_NUMERIC docvalues type

instead of a custom encoding in BINARY. In low level benchmarks this is 2x to 5x faster: its also optimized for the common case where fields actually only contain at most one value for each document. Additionally SORTED_NUMERIC doesn't lose values if they appear more than once, so mathematical computations such as averages are correct. Closes #6967
2025-02-17 10:25:15 +00:00 · 2014-07-23 14:55:03 -04:00 · 2014-07-23 14:55:03 -04:00 · 66825ac851
commit 66825ac851
parent ff2903d2c6
15 changed files with 381 additions and 58 deletions
--- a/src/main/java/org/elasticsearch/index/fielddata/plain/DocValuesIndexFieldData.java
+++ b/src/main/java/org/elasticsearch/index/fielddata/plain/DocValuesIndexFieldData.java
@ -22,6 +22,7 @@ package org.elasticsearch.index.fielddata.plain;
 import com.google.common.collect.ImmutableSet;
 import org.apache.lucene.index.IndexReader;
 import org.elasticsearch.ElasticsearchIllegalArgumentException;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.logging.ESLogger;
 import org.elasticsearch.common.logging.Loggers;
 import org.elasticsearch.common.settings.Settings;
@ -107,7 +108,12 @@ public abstract class DocValuesIndexFieldData {
                assert !numericType.isFloatingPoint();
                return new NumericDVIndexFieldData(index, fieldNames, mapper.fieldDataType());
            } else if (numericType != null) {
-                return new BinaryDVNumericIndexFieldData(index, fieldNames, numericType, mapper.fieldDataType());
+                if (Version.indexCreated(indexSettings).onOrAfter(Version.V_1_4_0)) {
+                    return new SortedNumericDVIndexFieldData(index, fieldNames, numericType, mapper.fieldDataType());
+                } else {
+                    // prior to ES 1.4: multi-valued numerics were boxed inside a byte[] as BINARY
+                    return new BinaryDVNumericIndexFieldData(index, fieldNames, numericType, mapper.fieldDataType());
+                }
            } else {
                return new SortedSetDVOrdinalsIndexFieldData(index, cache, indexSettings, fieldNames, breakerService, mapper.fieldDataType());
            }
--- a/src/main/java/org/elasticsearch/index/fielddata/plain/SortedNumericDVIndexFieldData.java
+++ b/src/main/java/org/elasticsearch/index/fielddata/plain/SortedNumericDVIndexFieldData.java
@ -0,0 +1,292 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.fielddata.plain;
+
+import com.google.common.base.Preconditions;
+import org.apache.lucene.index.*;
+import org.apache.lucene.util.NumericUtils;
+import org.elasticsearch.ElasticsearchIllegalStateException;
+import org.elasticsearch.index.Index;
+import org.elasticsearch.index.fielddata.*;
+import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
+import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource;
+import org.elasticsearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource;
+import org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource;
+import org.elasticsearch.index.mapper.FieldMapper.Names;
+import org.elasticsearch.search.MultiValueMode;
+
+import java.io.IOException;
+
+/**
+ * FieldData backed by {@link AtomicReader#getSortedNumericDocValues(String)}
+ * @see FieldInfo.DocValuesType#SORTED_NUMERIC
+ */
+public class SortedNumericDVIndexFieldData extends DocValuesIndexFieldData implements IndexNumericFieldData {
+    private final NumericType numericType;
+    
+    public SortedNumericDVIndexFieldData(Index index, Names fieldNames, NumericType numericType, FieldDataType fieldDataType) {
+        super(index, fieldNames, fieldDataType);
+        Preconditions.checkArgument(numericType != null, "numericType must be non-null");
+        this.numericType = numericType;
+    }
+
+    @Override
+    public org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource comparatorSource(Object missingValue, MultiValueMode sortMode, Nested nested) {
+        switch (numericType) {
+            case FLOAT:
+                return new FloatValuesComparatorSource(this, missingValue, sortMode, nested);
+            case DOUBLE: 
+                return new DoubleValuesComparatorSource(this, missingValue, sortMode, nested);
+            default:
+                assert !numericType.isFloatingPoint();
+                return new LongValuesComparatorSource(this, missingValue, sortMode, nested);
+        }
+    }
+
+    @Override
+    public NumericType getNumericType() {
+        return numericType;
+    }
+    
+    @Override
+    public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
+        return load(context);
+    }
+
+    @Override
+    public AtomicNumericFieldData load(AtomicReaderContext context) {
+        final AtomicReader reader = context.reader();
+        final String field = fieldNames.indexName();
+        
+        switch (numericType) {
+            case FLOAT:
+                return new SortedNumericFloatFieldData(reader, field);
+            case DOUBLE:
+                return new SortedNumericDoubleFieldData(reader, field);
+            default:
+                return new SortedNumericLongFieldData(reader, field);
+        } 
+    }
+    
+    /**
+     * FieldData implementation for integral types.
+     * <p>
+     * Order of values within a document is consistent with 
+     * {@link Long#compareTo(Long)}.
+     * <p>
+     * Although the API is multi-valued, most codecs in Lucene specialize 
+     * for the case where documents have at most one value. In this case
+     * {@link DocValues#unwrapSingleton(SortedNumericDocValues)} will return
+     * the underlying single-valued NumericDocValues representation, and 
+     * {@link DocValues#unwrapSingletonBits(SortedNumericDocValues)} will return
+     * a Bits matching documents that have a real value (as opposed to missing).
+     */
+    static final class SortedNumericLongFieldData extends AtomicLongFieldData {
+        final AtomicReader reader;
+        final String field;
+
+        SortedNumericLongFieldData(AtomicReader reader, String field) {
+            super(-1L);
+            this.reader = reader;
+            this.field = field;
+        }
+
+        @Override
+        public SortedNumericDocValues getLongValues() {
+            try {
+                return DocValues.getSortedNumeric(reader, field);
+            } catch (IOException e) {
+                throw new ElasticsearchIllegalStateException("Cannot load doc values", e);
+            }
+        }
+    }
+    
+    /**
+     * FieldData implementation for 32-bit float values.
+     * <p>
+     * Order of values within a document is consistent with
+     * {@link Float#compareTo(Float)}, hence the following reversible
+     * transformation is applied at both index and search:
+     * {code}
+     *   bits ^ (bits >> 31) & 0x7fffffff
+     * {code}
+     * <p>
+     * Although the API is multi-valued, most codecs in Lucene specialize 
+     * for the case where documents have at most one value. In this case
+     * {@link FieldData#unwrapSingleton(SortedNumericDoubleValues)} will return
+     * the underlying single-valued NumericDoubleValues representation, and 
+     * {@link FieldData#unwrapSingletonBits(SortedNumericDoubleValues)} will return
+     * a Bits matching documents that have a real value (as opposed to missing).
+     */
+    static final class SortedNumericFloatFieldData extends AtomicDoubleFieldData {
+        final AtomicReader reader;
+        final String field;
+        
+        SortedNumericFloatFieldData(AtomicReader reader, String field) {
+            super(-1L);
+            this.reader = reader;
+            this.field = field;
+        }
+
+        @Override
+        public SortedNumericDoubleValues getDoubleValues() {
+            try {
+                SortedNumericDocValues raw = DocValues.getSortedNumeric(reader, field);
+                
+                NumericDocValues single = DocValues.unwrapSingleton(raw);
+                if (single != null) {
+                    return FieldData.singleton(new SingleFloatValues(single), DocValues.unwrapSingletonBits(raw));
+                } else {
+                    return new MultiFloatValues(raw);
+                }
+            } catch (IOException e) {
+                throw new ElasticsearchIllegalStateException("Cannot load doc values", e);
+            }
+        }
+    }
+    
+    /** 
+     * Wraps a NumericDocValues and exposes a single 32-bit float per document.
+     */
+    static final class SingleFloatValues extends NumericDoubleValues {
+        final NumericDocValues in;
+        
+        SingleFloatValues(NumericDocValues in) {
+            this.in = in;
+        }
+
+        @Override
+        public double get(int docID) {
+            return NumericUtils.sortableIntToFloat((int) in.get(docID));
+        }
+    }
+    
+    /** 
+     * Wraps a SortedNumericDocValues and exposes multiple 32-bit floats per document.
+     */
+    static final class MultiFloatValues extends SortedNumericDoubleValues {
+        final SortedNumericDocValues in;
+        
+        MultiFloatValues(SortedNumericDocValues in) {
+            this.in = in;
+        }
+        
+        @Override
+        public void setDocument(int doc) {
+            in.setDocument(doc);
+        }
+
+        @Override
+        public double valueAt(int index) {
+            return NumericUtils.sortableIntToFloat((int) in.valueAt(index));
+        }
+
+        @Override
+        public int count() {
+            return in.count();
+        }
+    }
+    
+    /**
+     * FieldData implementation for 64-bit double values.
+     * <p>
+     * Order of values within a document is consistent with
+     * {@link Double#compareTo(Double)}, hence the following reversible
+     * transformation is applied at both index and search:
+     * {code}
+     *   bits ^ (bits >> 63) & 0x7fffffffffffffffL
+     * {code}
+     * <p>
+     * Although the API is multi-valued, most codecs in Lucene specialize 
+     * for the case where documents have at most one value. In this case
+     * {@link FieldData#unwrapSingleton(SortedNumericDoubleValues)} will return
+     * the underlying single-valued NumericDoubleValues representation, and 
+     * {@link FieldData#unwrapSingletonBits(SortedNumericDoubleValues)} will return
+     * a Bits matching documents that have a real value (as opposed to missing).
+     */
+    static final class SortedNumericDoubleFieldData extends AtomicDoubleFieldData {
+        final AtomicReader reader;
+        final String field;
+        
+        SortedNumericDoubleFieldData(AtomicReader reader, String field) {
+            super(-1L);
+            this.reader = reader;
+            this.field = field;
+        }
+
+        @Override
+        public SortedNumericDoubleValues getDoubleValues() {
+            try {
+                SortedNumericDocValues raw = DocValues.getSortedNumeric(reader, field);
+                
+                NumericDocValues single = DocValues.unwrapSingleton(raw);
+                if (single != null) {
+                    return FieldData.singleton(new SingleDoubleValues(single), DocValues.unwrapSingletonBits(raw));
+                } else {
+                    return new MultiDoubleValues(raw);
+                }
+            } catch (IOException e) {
+                throw new ElasticsearchIllegalStateException("Cannot load doc values", e);
+            }
+        }
+    }
+    
+    /** 
+     * Wraps a NumericDocValues and exposes a single 64-bit double per document.
+     */
+    static final class SingleDoubleValues extends NumericDoubleValues {
+        final NumericDocValues in;
+        
+        SingleDoubleValues(NumericDocValues in) {
+            this.in = in;
+        }
+
+        @Override
+        public double get(int docID) {
+            return NumericUtils.sortableLongToDouble(in.get(docID));
+        }
+    }
+    
+    /** 
+     * Wraps a SortedNumericDocValues and exposes multiple 64-bit doubles per document.
+     */
+    static final class MultiDoubleValues extends SortedNumericDoubleValues {
+        final SortedNumericDocValues in;
+        
+        MultiDoubleValues(SortedNumericDocValues in) {
+            this.in = in;
+        }
+
+        @Override
+        public void setDocument(int doc) {
+            in.setDocument(doc);
+        }
+
+        @Override
+        public double valueAt(int index) {
+            return NumericUtils.sortableLongToDouble(in.valueAt(index));
+        }
+
+        @Override
+        public int count() {
+            return in.count();
+        }
+    }
+}
--- a/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java
+++ b/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java
@ -320,7 +320,7 @@ public class ByteFieldMapper extends NumberFieldMapper<Byte> {
            fields.add(field);
        }
        if (hasDocValues()) {
-            addDocValue(context, value);
+            addDocValue(context, fields, value);
        }
    }

--- a/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java
+++ b/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java
@ -41,7 +41,6 @@ import org.elasticsearch.common.unit.Fuzziness;
 import org.elasticsearch.common.util.LocaleUtils;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
-import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.index.analysis.NumericDateAnalyzer;
 import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
 import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
@ -514,7 +513,7 @@ public class DateFieldMapper extends NumberFieldMapper<Long> {
                fields.add(field);
            }
            if (hasDocValues()) {
-                addDocValue(context, value);
+                addDocValue(context, fields, value);
            }
        }
    }
--- a/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java
+++ b/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java
@ -316,12 +316,16 @@ public class DoubleFieldMapper extends NumberFieldMapper<Double> {
            fields.add(field);
        }
        if (hasDocValues()) {
-            CustomDoubleNumericDocValuesField field = (CustomDoubleNumericDocValuesField) context.doc().getByKey(names().indexName());
-            if (field != null) {
-                field.add(value);
+            if (useSortedNumericDocValues) {
+                addDocValue(context, fields, NumericUtils.doubleToSortableLong(value));
            } else {
-                field = new CustomDoubleNumericDocValuesField(names().indexName(), value);
-                context.doc().addWithKey(names().indexName(), field);
+                CustomDoubleNumericDocValuesField field = (CustomDoubleNumericDocValuesField) context.doc().getByKey(names().indexName());
+                if (field != null) {
+                    field.add(value);
+                } else {
+                    field = new CustomDoubleNumericDocValuesField(names().indexName(), value);
+                    context.doc().addWithKey(names().indexName(), field);
+                }
            }
        }
    }
--- a/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java
+++ b/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java
@ -321,12 +321,16 @@ public class FloatFieldMapper extends NumberFieldMapper<Float> {
            fields.add(field);
        }
        if (hasDocValues()) {
-            CustomFloatNumericDocValuesField field = (CustomFloatNumericDocValuesField) context.doc().getByKey(names().indexName());
-            if (field != null) {
-                field.add(value);
+            if (useSortedNumericDocValues) {
+                addDocValue(context, fields, NumericUtils.floatToSortableInt(value));
            } else {
-                field = new CustomFloatNumericDocValuesField(names().indexName(), value);
-                context.doc().addWithKey(names().indexName(), field);
+                CustomFloatNumericDocValuesField field = (CustomFloatNumericDocValuesField) context.doc().getByKey(names().indexName());
+                if (field != null) {
+                    field.add(value);
+                } else {
+                    field = new CustomFloatNumericDocValuesField(names().indexName(), value);
+                    context.doc().addWithKey(names().indexName(), field);
+                }
            }
        }
    }
--- a/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java
+++ b/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java
@ -319,7 +319,7 @@ public class IntegerFieldMapper extends NumberFieldMapper<Integer> {
            fields.add(field);
        }
        if (hasDocValues()) {
-            addDocValue(context, value);
+            addDocValue(context, fields, value);
        }
    }

--- a/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java
+++ b/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java
@ -305,7 +305,7 @@ public class LongFieldMapper extends NumberFieldMapper<Long> {
            fields.add(field);
        }
        if (hasDocValues()) {
-            addDocValue(context, value);
+            addDocValue(context, fields, value);
        }
    }

--- a/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java
+++ b/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java
@ -27,6 +27,7 @@ import org.apache.lucene.analysis.NumericTokenStream;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.IndexableField;
@ -35,6 +36,7 @@ import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.store.ByteArrayDataOutput;
 import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.Explicit;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.settings.Settings;
@ -138,6 +140,14 @@ public abstract class NumberFieldMapper<T extends Number> extends AbstractFieldM

    protected Explicit<Boolean> coerce;
    
+    /** 
+     * True if index version is 1.4+
+     * <p>
+     * In this case numerics are encoded with SORTED_NUMERIC docvalues,
+     * otherwise for older indexes we must continue to write BINARY (for now)
+     */
+    protected final boolean useSortedNumericDocValues;
+    
    private ThreadLocal<NumericTokenStream> tokenStream = new ThreadLocal<NumericTokenStream>() {
        @Override
        protected NumericTokenStream initialValue() {
@ -189,6 +199,8 @@ public abstract class NumberFieldMapper<T extends Number> extends AbstractFieldM
        }
        this.ignoreMalformed = ignoreMalformed;
        this.coerce = coerce;
+        Version v = indexSettings == null ? Version.CURRENT : Version.indexCreated(indexSettings);
+        this.useSortedNumericDocValues = v.onOrAfter(Version.V_1_4_0);
    }

    @Override
@ -234,13 +246,17 @@ public abstract class NumberFieldMapper<T extends Number> extends AbstractFieldM

    protected abstract void innerParseCreateField(ParseContext context, List<Field> fields) throws IOException;

-    protected final void addDocValue(ParseContext context, long value) {
-        CustomLongNumericDocValuesField field = (CustomLongNumericDocValuesField) context.doc().getByKey(names().indexName());
-        if (field != null) {
-            field.add(value);
+    protected final void addDocValue(ParseContext context, List<Field> fields, long value) {
+        if (useSortedNumericDocValues) {
+            fields.add(new SortedNumericDocValuesField(names().indexName(), value));
        } else {
-            field = new CustomLongNumericDocValuesField(names().indexName(), value);
-            context.doc().addWithKey(names().indexName(), field);
+            CustomLongNumericDocValuesField field = (CustomLongNumericDocValuesField) context.doc().getByKey(names().indexName());
+            if (field != null) {
+                field.add(value);
+            } else {
+                field = new CustomLongNumericDocValuesField(names().indexName(), value);
+                context.doc().addWithKey(names().indexName(), field);
+            }
        }
    }

--- a/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java
+++ b/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java
@ -321,7 +321,7 @@ public class ShortFieldMapper extends NumberFieldMapper<Short> {
            fields.add(field);
        }
        if (hasDocValues()) {
-            addDocValue(context, value);
+            addDocValue(context, fields, value);
        }
    }

--- a/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java
+++ b/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java
@ -298,7 +298,7 @@ public class IpFieldMapper extends NumberFieldMapper<Long> {
            fields.add(field);
        }
        if (hasDocValues()) {
-            addDocValue(context, value);
+            addDocValue(context, fields, value);
        }
    }

--- a/src/test/java/org/elasticsearch/index/fielddata/AbstractFieldDataTests.java
+++ b/src/test/java/org/elasticsearch/index/fielddata/AbstractFieldDataTests.java
@ -22,6 +22,7 @@ package org.elasticsearch.index.fielddata;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.index.*;
 import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.settings.ImmutableSettings;
 import org.elasticsearch.common.settings.Settings;
@ -34,6 +35,8 @@ import org.junit.After;
 import org.junit.Before;

 // we might wanna cut this over to LuceneTestCase
+@SuppressCodecs({"Lucene3x", "Lucene40", "Lucene41", "Lucene42", "Lucene45", "Lucene46"}) 
+// avoid codecs that do not support SortedNumerics, SortedSet, etc
 public abstract class AbstractFieldDataTests extends ElasticsearchSingleNodeTest {

    protected IndexService indexService;
--- a/src/test/java/org/elasticsearch/index/fielddata/DuelFieldDataTests.java
+++ b/src/test/java/org/elasticsearch/index/fielddata/DuelFieldDataTests.java
@ -101,9 +101,7 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
        typeMap.put(new FieldDataType("long", ImmutableSettings.builder().put("format", "doc_values")), Type.Long);
        typeMap.put(new FieldDataType("double", ImmutableSettings.builder().put("format", "doc_values")), Type.Double);
        typeMap.put(new FieldDataType("float", ImmutableSettings.builder().put("format", "doc_values")), Type.Float);
-        if (LuceneTestCase.defaultCodecSupportsSortedSet()) {
-            typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes);
-        }
+        typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes);
        ArrayList<Entry<FieldDataType, Type>> list = new ArrayList<>(typeMap.entrySet());
        Preprocessor pre = new ToDoublePreprocessor();
        while (!list.isEmpty()) {
@ -149,13 +147,17 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
        final int maxNumValues = randomBoolean() ? 1 : randomIntBetween(2, 40);
        byte[] values = new byte[maxNumValues];
        for (int i = 0; i < atLeast; i++) {
-            final int numValues = randomInt(maxNumValues);
+            int numValues = randomInt(maxNumValues);
+            // FD loses values if they are duplicated, so we must deduplicate for this test
+            Set<Byte> vals = new HashSet<Byte>();
            for (int j = 0; j < numValues; ++j) {
-                if (randomBoolean()) {
-                    values[j] = 1; // test deduplication
-                } else {
-                    values[j] = randomByte();
-                }
+                vals.add(randomByte());
+            }
+            
+            numValues = vals.size();
+            int upto = 0;
+            for (Byte bb : vals) {
+                values[upto++] = bb.byteValue();
            }

            XContentBuilder doc = XContentFactory.jsonBuilder().startObject();
@ -227,15 +229,22 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
        final int maxNumValues = randomBoolean() ? 1 : randomIntBetween(2, 40);
        float[] values = new float[maxNumValues];
        for (int i = 0; i < atLeast; i++) {
-            final int numValues = randomInt(maxNumValues);
+            int numValues = randomInt(maxNumValues);
            float def = randomBoolean() ? randomFloat() : Float.NaN;
+            // FD loses values if they are duplicated, so we must deduplicate for this test
+            Set<Float> vals = new HashSet<Float>();
            for (int j = 0; j < numValues; ++j) {
                if (randomBoolean()) {
-                    values[j] = def;
+                    vals.add(def);
                } else {
-                    values[j] = randomFloat();
+                    vals.add(randomFloat());
                }
            }
+            numValues = vals.size();
+            int upto = 0;
+            for (Float f : vals) {
+                values[upto++] = f.floatValue();
+            }

            XContentBuilder doc = XContentFactory.jsonBuilder().startObject().startArray("float");
            for (int j = 0; j < numValues; ++j) {
@ -302,15 +311,11 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
                for (int j : numbers) {
                    final String s = English.longToEnglish(j);
                    d.add(new StringField("bytes", s, Field.Store.NO));
-                    if (LuceneTestCase.defaultCodecSupportsSortedSet()) {
-                        d.add(new SortedSetDocValuesField("bytes", new BytesRef(s)));
-                    }
+                    d.add(new SortedSetDocValuesField("bytes", new BytesRef(s)));
                }
                if (random.nextInt(10) == 0) {
                    d.add(new StringField("bytes", "", Field.Store.NO));
-                    if (LuceneTestCase.defaultCodecSupportsSortedSet()) {
-                        d.add(new SortedSetDocValuesField("bytes", new BytesRef()));
-                    }
+                    d.add(new SortedSetDocValuesField("bytes", new BytesRef()));
                }
            }
            writer.addDocument(d);
@ -322,9 +327,7 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
        Map<FieldDataType, Type> typeMap = new HashMap<>();
        typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "fst")), Type.Bytes);
        typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "paged_bytes")), Type.Bytes);
-        if (LuceneTestCase.defaultCodecSupportsSortedSet()) {
-            typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes);
-        }
+        typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes);
        // TODO add filters
        ArrayList<Entry<FieldDataType, Type>> list = new ArrayList<>(typeMap.entrySet());
        Preprocessor pre = new Preprocessor();
@ -371,9 +374,7 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
            for (int j = 0; j < numVals; ++j) {
                final String value = RandomPicks.randomFrom(random, Arrays.asList(values));
                d.add(new StringField("string", value, Field.Store.NO));
-                if (LuceneTestCase.defaultCodecSupportsSortedSet()) {
-                    d.add(new SortedSetDocValuesField("bytes", new BytesRef(value)));
-                }
+                d.add(new SortedSetDocValuesField("bytes", new BytesRef(value)));
            }
            writer.addDocument(d);
            if (randomInt(10) == 0) {
@ -385,9 +386,7 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
        Map<FieldDataType, Type> typeMap = new HashMap<FieldDataType, DuelFieldDataTests.Type>();
        typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "fst")), Type.Bytes);
        typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "paged_bytes")), Type.Bytes);
-        if (LuceneTestCase.defaultCodecSupportsSortedSet()) {
-            typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes);
-        }
+        typeMap.put(new FieldDataType("string", ImmutableSettings.builder().put("format", "doc_values")), Type.Bytes);

        for (Map.Entry<FieldDataType, Type> entry : typeMap.entrySet()) {
            ifdService.clear();
--- a/src/test/java/org/elasticsearch/index/fielddata/IndexFieldDataServiceTests.java
+++ b/src/test/java/org/elasticsearch/index/fielddata/IndexFieldDataServiceTests.java
@ -68,7 +68,7 @@ public class IndexFieldDataServiceTests extends ElasticsearchSingleNodeTest {
                ifdService.clear();
                fd = ifdService.getForField(mapper);
                if (docValues) {
-                    assertTrue(fd instanceof BinaryDVNumericIndexFieldData);
+                    assertTrue(fd instanceof SortedNumericDVIndexFieldData);
                } else {
                    assertTrue(fd instanceof PackedArrayIndexFieldData);
                }
@ -78,7 +78,7 @@ public class IndexFieldDataServiceTests extends ElasticsearchSingleNodeTest {
            ifdService.clear();
            fd = ifdService.getForField(floatMapper);
            if (docValues) {
-                assertTrue(fd instanceof BinaryDVNumericIndexFieldData);
+                assertTrue(fd instanceof SortedNumericDVIndexFieldData);
            } else {
                assertTrue(fd instanceof FloatArrayIndexFieldData);
            }
@ -87,7 +87,7 @@ public class IndexFieldDataServiceTests extends ElasticsearchSingleNodeTest {
            ifdService.clear();
            fd = ifdService.getForField(doubleMapper);
            if (docValues) {
-                assertTrue(fd instanceof BinaryDVNumericIndexFieldData);
+                assertTrue(fd instanceof SortedNumericDVIndexFieldData);
            } else {
                assertTrue(fd instanceof DoubleArrayIndexFieldData);
            }
--- a/src/test/java/org/elasticsearch/index/mapper/numeric/SimpleNumericTests.java
+++ b/src/test/java/org/elasticsearch/index/mapper/numeric/SimpleNumericTests.java
@ -279,8 +279,8 @@ public class SimpleNumericTests extends ElasticsearchSingleNodeTest {
                .endObject()
                .bytes());
        final Document doc = parsedDoc.rootDoc();
-        assertEquals(DocValuesType.BINARY, SimpleStringMappingTests.docValuesType(doc, "int"));
-        assertEquals(DocValuesType.BINARY, SimpleStringMappingTests.docValuesType(doc, "double"));
+        assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "int"));
+        assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "double"));
    }

    public void testDocValuesOnNested() throws Exception {
@ -326,8 +326,8 @@ public class SimpleNumericTests extends ElasticsearchSingleNodeTest {
            if (doc == parsedDoc.rootDoc()) {
                continue;
            }
-            assertEquals(DocValuesType.BINARY, SimpleStringMappingTests.docValuesType(doc, "nested.int"));
-            assertEquals(DocValuesType.BINARY, SimpleStringMappingTests.docValuesType(doc, "nested.double"));
+            assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "nested.int"));
+            assertEquals(DocValuesType.SORTED_NUMERIC, SimpleStringMappingTests.docValuesType(doc, "nested.double"));
        }
    }