mirror of
synced 2025-03-25 01:19:02 +00:00
Remove non-default fielddata formats.
Now that doc values are the default for fielddata, specialized in-memory formats are becoming an esoteric option. This commit removes such formats: - `fst` on string fields, - `compressed` on geo points. I also removed documentation and tests that the fielddata cache is shared if you change the format, since this is only true for in-memory fielddata formats (given that for doc values, the caching is done directly in Lucene).
This commit is contained in:
@ -31,7 +31,6 @@ import org.elasticsearch.common.util.concurrent.KeyedLock;
import org.elasticsearch.index.AbstractIndexComponent;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.plain.*;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.core.BooleanFieldMapper;
import org.elasticsearch.index.mapper.internal.IndexFieldMapper;
@ -60,8 +59,6 @@ public class IndexFieldDataService extends AbstractIndexComponent {
private static final String DOC_VALUES_FORMAT = "doc_values";
private static final String ARRAY_FORMAT = "array";
private static final String PAGED_BYTES_FORMAT = "paged_bytes";
private static final String FST_FORMAT = "fst";
private static final String COMPRESSED_FORMAT = "compressed";
private final static ImmutableMap<String, IndexFieldData.Builder> buildersByType;
private final static ImmutableMap<String, IndexFieldData.Builder> docValuesBuildersByType;
@ -99,7 +96,6 @@ public class IndexFieldDataService extends AbstractIndexComponent {
buildersByTypeAndFormat = MapBuilder.<Tuple<String, String>, IndexFieldData.Builder>newMapBuilder()
.put(Tuple.tuple("string", PAGED_BYTES_FORMAT), new PagedBytesIndexFieldData.Builder())
.put(Tuple.tuple("string", FST_FORMAT), new FSTBytesIndexFieldData.Builder())
.put(Tuple.tuple("string", DOC_VALUES_FORMAT), new DocValuesIndexFieldData.Builder())
.put(Tuple.tuple("string", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())
@ -130,7 +126,6 @@ public class IndexFieldDataService extends AbstractIndexComponent {
.put(Tuple.tuple("geo_point", ARRAY_FORMAT), new GeoPointDoubleArrayIndexFieldData.Builder())
.put(Tuple.tuple("geo_point", DOC_VALUES_FORMAT), new GeoPointBinaryDVIndexFieldData.Builder())
.put(Tuple.tuple("geo_point", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())
.put(Tuple.tuple("geo_point", COMPRESSED_FORMAT), new GeoPointCompressedIndexFieldData.Builder())
.put(Tuple.tuple("binary", DOC_VALUES_FORMAT), new BytesBinaryDVIndexFieldData.Builder())
.put(Tuple.tuple("binary", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())
@ -24,7 +24,6 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MappedFieldType.Names;
import org.elasticsearch.index.mapper.MapperService;
@ -1,121 +0,0 @@
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.RandomAccessOrds;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FST.Arc;
import org.apache.lucene.util.fst.FST.BytesReader;
import org.apache.lucene.util.fst.Util;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
public class FSTBytesAtomicFieldData extends AbstractAtomicOrdinalsFieldData {
// 0 ordinal in values means no value (its null)
protected final Ordinals ordinals;
private long size = -1;
private final FST<Long> fst;
public FSTBytesAtomicFieldData(FST<Long> fst, Ordinals ordinals) {
this.ordinals = ordinals;
this.fst = fst;
public void close() {
public long ramBytesUsed() {
if (size == -1) {
long size = ordinals.ramBytesUsed();
// FST
size += fst == null ? 0 : fst.ramBytesUsed();
this.size = size;
return size;
public Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();
resources.add(Accountables.namedAccountable("ordinals", ordinals));
if (fst != null) {
resources.add(Accountables.namedAccountable("terms", fst));
return Collections.unmodifiableList(resources);
public RandomAccessOrds getOrdinalsValues() {
return ordinals.ordinals(new ValuesHolder(fst));
private static class ValuesHolder implements Ordinals.ValuesHolder {
private final FST<Long> fst;
// per-thread resources
private final BytesRefBuilder scratch;
protected final BytesReader in;
protected final Arc<Long> firstArc = new Arc<>();
protected final Arc<Long> scratchArc = new Arc<>();
protected final IntsRefBuilder scratchInts = new IntsRefBuilder();
ValuesHolder(FST<Long> fst) {
this.fst = fst;
scratch = new BytesRefBuilder();
in = fst.getBytesReader();
public BytesRef lookupOrd(long ord) {
assert ord != SortedSetDocValues.NO_MORE_ORDS;
try {
IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts);
Util.toBytesRef(output, scratch);
} catch (IOException ex) {
return scratch.get();
@ -1,116 +0,0 @@
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.*;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FST.INPUT_TYPE;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
import org.elasticsearch.common.breaker.CircuitBreaker;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.indices.breaker.CircuitBreakerService;
public class FSTBytesIndexFieldData extends AbstractIndexOrdinalsFieldData {
private final CircuitBreakerService breakerService;
public static class Builder implements IndexFieldData.Builder {
public IndexOrdinalsFieldData build(Index index, @IndexSettings Settings indexSettings, MappedFieldType fieldType,
IndexFieldDataCache cache, CircuitBreakerService breakerService, MapperService mapperService) {
return new FSTBytesIndexFieldData(index, indexSettings, fieldType.names(), fieldType.fieldDataType(), cache, breakerService);
FSTBytesIndexFieldData(Index index, @IndexSettings Settings indexSettings, MappedFieldType.Names fieldNames, FieldDataType fieldDataType,
IndexFieldDataCache cache, CircuitBreakerService breakerService) {
super(index, indexSettings, fieldNames, fieldDataType, cache, breakerService);
this.breakerService = breakerService;
public AtomicOrdinalsFieldData loadDirect(LeafReaderContext context) throws Exception {
LeafReader reader = context.reader();
Terms terms = reader.terms(getFieldNames().indexName());
AtomicOrdinalsFieldData data = null;
// TODO: Use an actual estimator to estimate before loading.
NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker(CircuitBreaker.FIELDDATA));
if (terms == null) {
data = AbstractAtomicOrdinalsFieldData.empty();
estimator.afterLoad(null, data.ramBytesUsed());
return data;
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
org.apache.lucene.util.fst.Builder<Long> fstBuilder = new org.apache.lucene.util.fst.Builder<>(INPUT_TYPE.BYTE1, outputs);
final IntsRefBuilder scratch = new IntsRefBuilder();
final long numTerms;
if (regex == null && frequency == null) {
numTerms = terms.size();
} else {
numTerms = -1;
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
boolean success = false;
try (OrdinalsBuilder builder = new OrdinalsBuilder(numTerms, reader.maxDoc(), acceptableTransientOverheadRatio)) {
// we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support
// empty strings twice. ie. them merge fails for long output.
TermsEnum termsEnum = filter(terms, reader);
PostingsEnum docsEnum = null;
for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
final long termOrd = builder.nextOrdinal();
fstBuilder.add(Util.toIntsRef(term, scratch), (long) termOrd);
docsEnum = termsEnum.postings(null, docsEnum, PostingsEnum.NONE);
for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
FST<Long> fst = fstBuilder.finish();
final Ordinals ordinals = builder.build(fieldDataType.getSettings());
data = new FSTBytesAtomicFieldData(fst, ordinals);
success = true;
return data;
} finally {
if (success) {
estimator.afterLoad(null, data.ramBytesUsed());
@ -1,169 +0,0 @@
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.RandomAccessOrds;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PagedMutable;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.index.fielddata.FieldData;
import org.elasticsearch.index.fielddata.GeoPointValues;
import org.elasticsearch.index.fielddata.MultiGeoPointValues;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.mapper.geo.GeoPointFieldMapper;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
* Field data atomic impl for geo points with lossy compression.
public abstract class GeoPointCompressedAtomicFieldData extends AbstractAtomicGeoPointFieldData {
public void close() {
static class WithOrdinals extends GeoPointCompressedAtomicFieldData {
private final GeoPointFieldMapper.Encoding encoding;
private final PagedMutable lon, lat;
private final Ordinals ordinals;
private final int maxDoc;
public WithOrdinals(GeoPointFieldMapper.Encoding encoding, PagedMutable lon, PagedMutable lat, Ordinals ordinals, int maxDoc) {
this.encoding = encoding;
this.lon = lon;
this.lat = lat;
this.ordinals = ordinals;
this.maxDoc = maxDoc;
public long ramBytesUsed() {
return RamUsageEstimator.NUM_BYTES_INT/*size*/ + lon.ramBytesUsed() + lat.ramBytesUsed();
public Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();
resources.add(Accountables.namedAccountable("latitude", lat));
resources.add(Accountables.namedAccountable("longitude", lon));
return Collections.unmodifiableList(resources);
public MultiGeoPointValues getGeoPointValues() {
final RandomAccessOrds ords = ordinals.ordinals();
final SortedDocValues singleOrds = DocValues.unwrapSingleton(ords);
if (singleOrds != null) {
final GeoPoint point = new GeoPoint();
final GeoPointValues values = new GeoPointValues() {
public GeoPoint get(int docID) {
final int ord = singleOrds.getOrd(docID);
if (ord >= 0) {
encoding.decode(lat.get(ord), lon.get(ord), point);
} else {
point.reset(0, 0);
return point;
return FieldData.singleton(values, DocValues.docsWithValue(singleOrds, maxDoc));
} else {
final GeoPoint point = new GeoPoint();
return new MultiGeoPointValues() {
public GeoPoint valueAt(int index) {
final long ord = ords.ordAt(index);
encoding.decode(lat.get(ord), lon.get(ord), point);
return point;
public void setDocument(int docId) {
public int count() {
return ords.cardinality();
* Assumes unset values are marked in bitset, and docId is used as the index to the value array.
public static class Single extends GeoPointCompressedAtomicFieldData {
private final GeoPointFieldMapper.Encoding encoding;
private final PagedMutable lon, lat;
private final BitSet set;
public Single(GeoPointFieldMapper.Encoding encoding, PagedMutable lon, PagedMutable lat, BitSet set) {
this.encoding = encoding;
this.lon = lon;
this.lat = lat;
this.set = set;
public long ramBytesUsed() {
return RamUsageEstimator.NUM_BYTES_INT/*size*/ + lon.ramBytesUsed() + lat.ramBytesUsed() + (set == null ? 0 : set.ramBytesUsed());
public Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();
resources.add(Accountables.namedAccountable("latitude", lat));
resources.add(Accountables.namedAccountable("longitude", lon));
if (set != null) {
resources.add(Accountables.namedAccountable("missing bitset", set));
return Collections.unmodifiableList(resources);
public MultiGeoPointValues getGeoPointValues() {
final GeoPoint point = new GeoPoint();
final GeoPointValues values = new GeoPointValues() {
public GeoPoint get(int docID) {
encoding.decode(lat.get(docID), lon.get(docID), point);
return point;
return FieldData.singleton(values, set);
@ -1,157 +0,0 @@
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomAccessOrds;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PagedMutable;
import org.elasticsearch.common.breaker.CircuitBreaker;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.DistanceUnit;
import org.elasticsearch.common.unit.DistanceUnit.Distance;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.geo.GeoPointFieldMapper;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.indices.breaker.CircuitBreakerService;
public class GeoPointCompressedIndexFieldData extends AbstractIndexGeoPointFieldData {
private static final String PRECISION_KEY = "precision";
private static final Distance DEFAULT_PRECISION_VALUE = new Distance(1, DistanceUnit.CENTIMETERS);
private final CircuitBreakerService breakerService;
public static class Builder implements IndexFieldData.Builder {
public IndexFieldData<?> build(Index index, @IndexSettings Settings indexSettings, MappedFieldType fieldType, IndexFieldDataCache cache,
CircuitBreakerService breakerService, MapperService mapperService) {
FieldDataType type = fieldType.fieldDataType();
final String precisionAsString = type.getSettings().get(PRECISION_KEY);
final Distance precision;
if (precisionAsString != null) {
precision = Distance.parseDistance(precisionAsString);
} else {
return new GeoPointCompressedIndexFieldData(index, indexSettings, fieldType.names(), fieldType.fieldDataType(), cache, precision, breakerService);
private final GeoPointFieldMapper.Encoding encoding;
public GeoPointCompressedIndexFieldData(Index index, @IndexSettings Settings indexSettings, MappedFieldType.Names fieldNames,
FieldDataType fieldDataType, IndexFieldDataCache cache, Distance precision,
CircuitBreakerService breakerService) {
super(index, indexSettings, fieldNames, fieldDataType, cache);
this.encoding = GeoPointFieldMapper.Encoding.of(precision);
this.breakerService = breakerService;
public AtomicGeoPointFieldData loadDirect(LeafReaderContext context) throws Exception {
LeafReader reader = context.reader();
Terms terms = reader.terms(getFieldNames().indexName());
AtomicGeoPointFieldData data = null;
// TODO: Use an actual estimator to estimate before loading.
NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker(CircuitBreaker.FIELDDATA));
if (terms == null) {
data = AbstractAtomicGeoPointFieldData.empty(reader.maxDoc());
estimator.afterLoad(null, data.ramBytesUsed());
return data;
final long initialSize;
if (terms.size() >= 0) {
initialSize = 1 + terms.size();
} else { // codec doesn't expose size
initialSize = 1 + Math.min(1 << 12, reader.maxDoc());
final int pageSize = Integer.highestOneBit(BigArrays.PAGE_SIZE_IN_BYTES * 8 / encoding.numBitsPerCoordinate() - 1) << 1;
PagedMutable lat = new PagedMutable(initialSize, pageSize, encoding.numBitsPerCoordinate(), PackedInts.COMPACT);
PagedMutable lon = new PagedMutable(initialSize, pageSize, encoding.numBitsPerCoordinate(), PackedInts.COMPACT);
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
boolean success = false;
try (OrdinalsBuilder builder = new OrdinalsBuilder(terms.size(), reader.maxDoc(), acceptableTransientOverheadRatio)) {
final GeoPointEnum iter = new GeoPointEnum(builder.buildFromTerms(terms.iterator()));
GeoPoint point;
while ((point = iter.next()) != null) {
final long ord = builder.currentOrdinal();
if (lat.size() <= ord) {
final long newSize = BigArrays.overSize(ord + 1);
lat = lat.resize(newSize);
lon = lon.resize(newSize);
lat.set(ord, encoding.encodeCoordinate(point.getLat()));
lon.set(ord, encoding.encodeCoordinate(point.getLon()));
Ordinals build = builder.build(fieldDataType.getSettings());
RandomAccessOrds ordinals = build.ordinals();
if (FieldData.isMultiValued(ordinals) || CommonSettings.getMemoryStorageHint(fieldDataType) == CommonSettings.MemoryStorageFormat.ORDINALS) {
if (lat.size() != ordinals.getValueCount()) {
lat = lat.resize(ordinals.getValueCount());
lon = lon.resize(ordinals.getValueCount());
data = new GeoPointCompressedAtomicFieldData.WithOrdinals(encoding, lon, lat, build, reader.maxDoc());
} else {
int maxDoc = reader.maxDoc();
PagedMutable sLat = new PagedMutable(reader.maxDoc(), pageSize, encoding.numBitsPerCoordinate(), PackedInts.COMPACT);
PagedMutable sLon = new PagedMutable(reader.maxDoc(), pageSize, encoding.numBitsPerCoordinate(), PackedInts.COMPACT);
final long missing = encoding.encodeCoordinate(0);
for (int i = 0; i < maxDoc; i++) {
final long nativeOrdinal = ordinals.nextOrd();
if (nativeOrdinal >= 0) {
sLat.set(i, lat.get(nativeOrdinal));
sLon.set(i, lon.get(nativeOrdinal));
} else {
sLat.set(i, missing);
sLon.set(i, missing);
BitSet set = builder.buildDocsWithValuesSet();
data = new GeoPointCompressedAtomicFieldData.Single(encoding, sLon, sLat, set);
success = true;
return data;
} finally {
if (success) {
estimator.afterLoad(null, data.ramBytesUsed());
@ -87,7 +87,6 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
LeafReaderContext context = refreshReader();
Map<FieldDataType, Type> typeMap = new HashMap<>();
typeMap.put(new FieldDataType("string", Settings.builder().put("format", "fst")), Type.Bytes);
typeMap.put(new FieldDataType("string", Settings.builder().put("format", "paged_bytes")), Type.Bytes);
typeMap.put(new FieldDataType("byte", Settings.builder().put("format", "array")), Type.Integer);
typeMap.put(new FieldDataType("short", Settings.builder().put("format", "array")), Type.Integer);
@ -325,7 +324,6 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
LeafReaderContext context = refreshReader();
Map<FieldDataType, Type> typeMap = new HashMap<>();
typeMap.put(new FieldDataType("string", Settings.builder().put("format", "fst")), Type.Bytes);
typeMap.put(new FieldDataType("string", Settings.builder().put("format", "paged_bytes")), Type.Bytes);
typeMap.put(new FieldDataType("string", Settings.builder().put("format", "doc_values")), Type.Bytes);
// TODO add filters
@ -384,7 +382,6 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
Map<FieldDataType, Type> typeMap = new HashMap<FieldDataType, DuelFieldDataTests.Type>();
typeMap.put(new FieldDataType("string", Settings.builder().put("format", "fst")), Type.Bytes);
typeMap.put(new FieldDataType("string", Settings.builder().put("format", "paged_bytes")), Type.Bytes);
typeMap.put(new FieldDataType("string", Settings.builder().put("format", "doc_values")), Type.Bytes);
@ -437,7 +434,6 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
Map<FieldDataType, Type> typeMap = new HashMap<>();
final Distance precision = new Distance(1, randomFrom(DistanceUnit.values()));
typeMap.put(new FieldDataType("geo_point", Settings.builder().put("format", "array")), Type.GeoPoint);
typeMap.put(new FieldDataType("geo_point", Settings.builder().put("format", "compressed").put("precision", precision)), Type.GeoPoint);
typeMap.put(new FieldDataType("geo_point", Settings.builder().put("format", "doc_values")), Type.GeoPoint);
ArrayList<Entry<FieldDataType, Type>> list = new ArrayList<>(typeMap.entrySet());
@ -1,33 +0,0 @@
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.index.fielddata;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
public class FSTPackedBytesStringFieldDataTests extends AbstractStringFieldDataTests {
protected FieldDataType getFieldDataType() {
return new FieldDataType("string", Settings.builder().put("format", "fst").put(OrdinalsBuilder.FORCE_MULTI_ORDINALS, randomBoolean()));
@ -60,7 +60,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
writer.forceMerge(1, true);
LeafReaderContext context = refreshReader();
String[] formats = new String[] { "fst", "paged_bytes"};
String[] formats = new String[] { "paged_bytes"};
for (String format : formats) {
@ -153,7 +153,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
logger.debug(hundred + " " + ten + " " + five);
writer.forceMerge(1, true);
LeafReaderContext context = refreshReader();
String[] formats = new String[] { "fst", "paged_bytes"};
String[] formats = new String[] { "paged_bytes"};
for (String format : formats) {
@ -28,7 +28,6 @@ import org.apache.lucene.store.RAMDirectory;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.fielddata.plain.*;
import org.elasticsearch.index.mapper.ContentPath;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Mapper.BuilderContext;
import org.elasticsearch.index.mapper.MapperBuilders;
@ -101,10 +100,10 @@ public class IndexFieldDataServiceTests extends ElasticsearchSingleNodeTest {
final IndexService indexService = createIndex("test");
final IndexFieldDataService ifdService = indexService.fieldData();
final BuilderContext ctx = new BuilderContext(indexService.settingsService().getSettings(), new ContentPath(1));
final MappedFieldType stringMapper = MapperBuilders.stringField("string").tokenized(false).fieldDataSettings(DOC_VALUES_SETTINGS).fieldDataSettings(Settings.builder().put("format", "fst").build()).build(ctx).fieldType();
final MappedFieldType stringMapper = MapperBuilders.stringField("string").tokenized(false).fieldDataSettings(DOC_VALUES_SETTINGS).fieldDataSettings(Settings.builder().put("format", "disabled").build()).build(ctx).fieldType();
IndexFieldData<?> fd = ifdService.getForField(stringMapper);
assertTrue(fd instanceof FSTBytesIndexFieldData);
assertTrue(fd instanceof DisabledIndexFieldData);
final Settings fdSettings = Settings.builder().put("format", "array").build();
for (MappedFieldType mapper : Arrays.asList(
@ -133,7 +132,7 @@ public class IndexFieldDataServiceTests extends ElasticsearchSingleNodeTest {
final IndexService indexService = createIndex("test");
final IndexFieldDataService ifdService = indexService.fieldData();
final BuilderContext ctx = new BuilderContext(indexService.settingsService().getSettings(), new ContentPath(1));
final MappedFieldType mapper1 = MapperBuilders.stringField("s").tokenized(false).fieldDataSettings(Settings.builder().put(FieldDataType.FORMAT_KEY, "paged_bytes").build()).build(ctx).fieldType();
final MappedFieldType mapper1 = MapperBuilders.stringField("s").tokenized(false).docValues(true).fieldDataSettings(Settings.builder().put(FieldDataType.FORMAT_KEY, "paged_bytes").build()).build(ctx).fieldType();
final IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new KeywordAnalyzer()));
Document doc = new Document();
doc.add(new StringField("s", "thisisastring", Store.NO));
@ -150,18 +149,10 @@ public class IndexFieldDataServiceTests extends ElasticsearchSingleNodeTest {
// write new segment
final IndexReader reader2 = DirectoryReader.open(writer, true);
final MappedFieldType mapper2 = MapperBuilders.stringField("s").tokenized(false).fieldDataSettings(Settings.builder().put(FieldDataType.FORMAT_KEY, "fst").build()).build(ctx).fieldType();
final MappedFieldType mapper2 = MapperBuilders.stringField("s").tokenized(false).docValues(true).fieldDataSettings(Settings.builder().put(FieldDataType.FORMAT_KEY, "doc_values").build()).build(ctx).fieldType();
ifd = ifdService.getForField(mapper2);
assertThat(ifd, instanceOf(FSTBytesIndexFieldData.class));
for (LeafReaderContext arc : reader2.leaves()) {
AtomicFieldData afd = ifd.load(arc);
if (oldSegments.contains(arc.reader())) {
assertThat(afd, instanceOf(PagedBytesAtomicFieldData.class));
} else {
assertThat(afd, instanceOf(FSTBytesAtomicFieldData.class));
assertThat(ifd, instanceOf(SortedSetDVOrdinalsIndexFieldData.class));
@ -1,80 +0,0 @@
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.index.mapper.geo;
import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsRequest;
import org.elasticsearch.cluster.metadata.MappingMetaData;
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.unit.DistanceUnit;
import org.elasticsearch.common.unit.DistanceUnit.Distance;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import java.util.Map;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
public class GeoMappingTests extends ElasticsearchIntegrationTest {
public void testUpdatePrecision() throws Exception {
assertAcked(prepareCreate("test").addMapping("type1", XContentFactory.jsonBuilder().startObject()
.field("type", "geo_point")
.field("format", "compressed")
.field("precision", "2mm")
assertPrecision(new Distance(2, DistanceUnit.MILLIMETERS));
.field("type", "geo_point")
.field("format", "compressed")
.field("precision", "11m")
assertPrecision(new Distance(11, DistanceUnit.METERS));
private void assertPrecision(Distance expected) throws Exception {
ImmutableOpenMap<String, ImmutableOpenMap<String, MappingMetaData>> mappings = client().admin().indices().getMappings(new GetMappingsRequest().indices("test").types("type1")).actionGet().getMappings();
Map<String, ?> properties = (Map<String, ?>) mappings.get("test").get("type1").getSourceAsMap().get("properties");
Map<String, ?> pinProperties = (Map<String, ?>) properties.get("pin");
Map<String, ?> pinFieldData = (Map<String, ?>) pinProperties.get("fielddata");
Distance precision = Distance.parseDistance(pinFieldData.get("precision").toString());
assertEquals(expected, precision);
@ -462,7 +462,6 @@ public class MultiFieldTests extends ElasticsearchSingleNodeTest {
possibleSettings.put("filter.frequency.min", 1);
possibleSettings.put("filter.frequency.max", 2);
possibleSettings.put("filter.regex.pattern", ".*");
possibleSettings.put("format", "fst");
possibleSettings.put("loading", "eager");
possibleSettings.put("foo", "bar");
possibleSettings.put("zetting", "zValue");
@ -381,7 +381,7 @@ public class SimpleStringMappingTests extends ElasticsearchSingleNodeTest {
.field("type", "string")
.field("format", "fst")
.field("format", "paged_bytes")
@ -12,7 +12,7 @@
"search_analyzer": "whitespace",
"similarity": "my_similarity",
"fielddata": {
"format": "fst"
"format": "paged_bytes"
@ -70,9 +70,6 @@ public class RandomExceptionCircuitBreakerTests extends ElasticsearchIntegration
.field("type", "string")
.field("index", "not_analyzed")
.field("doc_values", randomBoolean())
.field("format", randomBytesFieldDataFormat())
.endObject() // fielddata
.endObject() // test-str
// I don't use randomNumericType() here because I don't want "byte", and I want "float" and "double"
@ -391,7 +391,6 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase
.field("match_mapping_type", "string")
.field(FieldDataType.FORMAT_KEY, randomFrom("paged_bytes", "fst"))
.field(Loading.KEY, randomLoadingValues())
@ -1769,14 +1768,6 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase
return randomFrom(Arrays.asList("array", "doc_values"));
* Returns a random bytes field data format from the choices of
* "paged_bytes", "fst", or "doc_values".
public static String randomBytesFieldDataFormat() {
return randomFrom(Arrays.asList("paged_bytes", "fst"));
* Returns a random JODA Time Zone based on Java Time Zones
@ -86,13 +86,13 @@ breaker using
The field data format controls how field data should be stored.
Depending on the field type, there might be several field data types
available. In particular, string and numeric types support the `doc_values`
available. In particular, string, geo-point and numeric types support the `doc_values`
format which allows for computing the field data data-structures at indexing
time and storing them on disk. Although it will make the index larger and may
be slightly slower, this implementation will be more near-realtime-friendly
and will require much less memory from the JVM than other implementations.
Here is an example of how to configure the `tag` field to use the `fst` field
Here is an example of how to configure the `tag` field to use the `paged_bytes` field
data format.
@ -101,31 +101,23 @@ data format.
"tag": {
"type": "string",
"fielddata": {
"format": "fst"
"format": "paged_bytes"
It is possible to change the field data format (and the field data settings
in general) on a live index by using the update mapping API. When doing so,
field data which had already been loaded for existing segments will remain
alive while new segments will use the new field data configuration. Thanks to
the background merging process, all segments will eventually use the new
field data format.
in general) on a live index by using the update mapping API.
==== String field data types
`paged_bytes` (default)::
`paged_bytes` (default on analyzed string fields)::
Stores unique terms sequentially in a large buffer and maps documents to
the indices of the terms they contain in this large buffer.
Stores terms in a FST. Slower to build than `paged_bytes` but can help lower
memory usage if many terms share common prefixes and/or suffixes.
`doc_values` (default when index is set to `not_analyzed`)::
Computes and stores field data data-structures on disk at indexing time.
Lowers memory usage but only works on non-analyzed strings (`index`: `no` or
@ -133,19 +125,19 @@ field data format.
==== Numeric field data types
`array` (default)::
Stores field values in memory using arrays.
`doc_values` (default unless doc values are disabled)::
Computes and stores field data data-structures on disk at indexing time.
==== Geo point field data types
`array` (default)::
Stores latitudes and longitudes in arrays.
`doc_values` (default unless doc values are disabled)::
Computes and stores field data data-structures on disk at indexing time.
Reference in New Issue
Block a user