diff --git a/core/src/main/java/org/elasticsearch/index/fielddata/IndexFieldDataService.java b/core/src/main/java/org/elasticsearch/index/fielddata/IndexFieldDataService.java index 4fed6b6c857..f851420e036 100644 --- a/core/src/main/java/org/elasticsearch/index/fielddata/IndexFieldDataService.java +++ b/core/src/main/java/org/elasticsearch/index/fielddata/IndexFieldDataService.java @@ -31,7 +31,6 @@ import org.elasticsearch.common.util.concurrent.KeyedLock; import org.elasticsearch.index.AbstractIndexComponent; import org.elasticsearch.index.Index; import org.elasticsearch.index.fielddata.plain.*; -import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.core.BooleanFieldMapper; import org.elasticsearch.index.mapper.internal.IndexFieldMapper; @@ -60,8 +59,6 @@ public class IndexFieldDataService extends AbstractIndexComponent { private static final String DOC_VALUES_FORMAT = "doc_values"; private static final String ARRAY_FORMAT = "array"; private static final String PAGED_BYTES_FORMAT = "paged_bytes"; - private static final String FST_FORMAT = "fst"; - private static final String COMPRESSED_FORMAT = "compressed"; private final static ImmutableMap buildersByType; private final static ImmutableMap docValuesBuildersByType; @@ -99,7 +96,6 @@ public class IndexFieldDataService extends AbstractIndexComponent { buildersByTypeAndFormat = MapBuilder., IndexFieldData.Builder>newMapBuilder() .put(Tuple.tuple("string", PAGED_BYTES_FORMAT), new PagedBytesIndexFieldData.Builder()) - .put(Tuple.tuple("string", FST_FORMAT), new FSTBytesIndexFieldData.Builder()) .put(Tuple.tuple("string", DOC_VALUES_FORMAT), new DocValuesIndexFieldData.Builder()) .put(Tuple.tuple("string", DISABLED_FORMAT), new DisabledIndexFieldData.Builder()) @@ -130,7 +126,6 @@ public class IndexFieldDataService extends AbstractIndexComponent { .put(Tuple.tuple("geo_point", ARRAY_FORMAT), new GeoPointDoubleArrayIndexFieldData.Builder()) .put(Tuple.tuple("geo_point", DOC_VALUES_FORMAT), new GeoPointBinaryDVIndexFieldData.Builder()) .put(Tuple.tuple("geo_point", DISABLED_FORMAT), new DisabledIndexFieldData.Builder()) - .put(Tuple.tuple("geo_point", COMPRESSED_FORMAT), new GeoPointCompressedIndexFieldData.Builder()) .put(Tuple.tuple("binary", DOC_VALUES_FORMAT), new BytesBinaryDVIndexFieldData.Builder()) .put(Tuple.tuple("binary", DISABLED_FORMAT), new DisabledIndexFieldData.Builder()) diff --git a/core/src/main/java/org/elasticsearch/index/fielddata/plain/DisabledIndexFieldData.java b/core/src/main/java/org/elasticsearch/index/fielddata/plain/DisabledIndexFieldData.java index e86ff0b1d9b..e8d37ff476f 100644 --- a/core/src/main/java/org/elasticsearch/index/fielddata/plain/DisabledIndexFieldData.java +++ b/core/src/main/java/org/elasticsearch/index/fielddata/plain/DisabledIndexFieldData.java @@ -24,7 +24,6 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.index.fielddata.*; import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested; -import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MappedFieldType.Names; import org.elasticsearch.index.mapper.MapperService; diff --git a/core/src/main/java/org/elasticsearch/index/fielddata/plain/FSTBytesAtomicFieldData.java b/core/src/main/java/org/elasticsearch/index/fielddata/plain/FSTBytesAtomicFieldData.java deleted file mode 100644 index e617c8005f1..00000000000 --- a/core/src/main/java/org/elasticsearch/index/fielddata/plain/FSTBytesAtomicFieldData.java +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.elasticsearch.index.fielddata.plain; - -import org.apache.lucene.index.RandomAccessOrds; -import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.Accountables; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefBuilder; -import org.apache.lucene.util.IntsRef; -import org.apache.lucene.util.IntsRefBuilder; -import org.apache.lucene.util.fst.FST; -import org.apache.lucene.util.fst.FST.Arc; -import org.apache.lucene.util.fst.FST.BytesReader; -import org.apache.lucene.util.fst.Util; -import org.elasticsearch.index.fielddata.ordinals.Ordinals; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; - -/** - */ -public class FSTBytesAtomicFieldData extends AbstractAtomicOrdinalsFieldData { - - // 0 ordinal in values means no value (its null) - protected final Ordinals ordinals; - - private long size = -1; - - private final FST fst; - - public FSTBytesAtomicFieldData(FST fst, Ordinals ordinals) { - this.ordinals = ordinals; - this.fst = fst; - } - - @Override - public void close() { - } - - @Override - public long ramBytesUsed() { - if (size == -1) { - long size = ordinals.ramBytesUsed(); - // FST - size += fst == null ? 0 : fst.ramBytesUsed(); - this.size = size; - } - return size; - } - - @Override - public Collection getChildResources() { - List resources = new ArrayList<>(); - resources.add(Accountables.namedAccountable("ordinals", ordinals)); - if (fst != null) { - resources.add(Accountables.namedAccountable("terms", fst)); - } - return Collections.unmodifiableList(resources); - } - - @Override - public RandomAccessOrds getOrdinalsValues() { - return ordinals.ordinals(new ValuesHolder(fst)); - } - - private static class ValuesHolder implements Ordinals.ValuesHolder { - - private final FST fst; - - // per-thread resources - private final BytesRefBuilder scratch; - protected final BytesReader in; - protected final Arc firstArc = new Arc<>(); - protected final Arc scratchArc = new Arc<>(); - protected final IntsRefBuilder scratchInts = new IntsRefBuilder(); - - ValuesHolder(FST fst) { - this.fst = fst; - scratch = new BytesRefBuilder(); - in = fst.getBytesReader(); - } - - @Override - public BytesRef lookupOrd(long ord) { - assert ord != SortedSetDocValues.NO_MORE_ORDS; - in.setPosition(0); - fst.getFirstArc(firstArc); - try { - IntsRef output = Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts); - scratch.clear(); - scratch.grow(output.length); - Util.toBytesRef(output, scratch); - } catch (IOException ex) { - //bogus - } - return scratch.get(); - } - } - -} diff --git a/core/src/main/java/org/elasticsearch/index/fielddata/plain/FSTBytesIndexFieldData.java b/core/src/main/java/org/elasticsearch/index/fielddata/plain/FSTBytesIndexFieldData.java deleted file mode 100644 index f0d4f1fde9a..00000000000 --- a/core/src/main/java/org/elasticsearch/index/fielddata/plain/FSTBytesIndexFieldData.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.elasticsearch.index.fielddata.plain; - -import org.apache.lucene.index.*; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IntsRefBuilder; -import org.apache.lucene.util.fst.FST; -import org.apache.lucene.util.fst.FST.INPUT_TYPE; -import org.apache.lucene.util.fst.PositiveIntOutputs; -import org.apache.lucene.util.fst.Util; -import org.elasticsearch.common.breaker.CircuitBreaker; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.Index; -import org.elasticsearch.index.fielddata.*; -import org.elasticsearch.index.fielddata.ordinals.Ordinals; -import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder; -import org.elasticsearch.index.mapper.FieldMapper; -import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.MapperService; -import org.elasticsearch.index.settings.IndexSettings; -import org.elasticsearch.indices.breaker.CircuitBreakerService; - -/** - */ -public class FSTBytesIndexFieldData extends AbstractIndexOrdinalsFieldData { - - private final CircuitBreakerService breakerService; - - public static class Builder implements IndexFieldData.Builder { - - @Override - public IndexOrdinalsFieldData build(Index index, @IndexSettings Settings indexSettings, MappedFieldType fieldType, - IndexFieldDataCache cache, CircuitBreakerService breakerService, MapperService mapperService) { - return new FSTBytesIndexFieldData(index, indexSettings, fieldType.names(), fieldType.fieldDataType(), cache, breakerService); - } - } - - FSTBytesIndexFieldData(Index index, @IndexSettings Settings indexSettings, MappedFieldType.Names fieldNames, FieldDataType fieldDataType, - IndexFieldDataCache cache, CircuitBreakerService breakerService) { - super(index, indexSettings, fieldNames, fieldDataType, cache, breakerService); - this.breakerService = breakerService; - } - - @Override - public AtomicOrdinalsFieldData loadDirect(LeafReaderContext context) throws Exception { - LeafReader reader = context.reader(); - - Terms terms = reader.terms(getFieldNames().indexName()); - AtomicOrdinalsFieldData data = null; - // TODO: Use an actual estimator to estimate before loading. - NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker(CircuitBreaker.FIELDDATA)); - if (terms == null) { - data = AbstractAtomicOrdinalsFieldData.empty(); - estimator.afterLoad(null, data.ramBytesUsed()); - return data; - } - PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); - org.apache.lucene.util.fst.Builder fstBuilder = new org.apache.lucene.util.fst.Builder<>(INPUT_TYPE.BYTE1, outputs); - final IntsRefBuilder scratch = new IntsRefBuilder(); - - final long numTerms; - if (regex == null && frequency == null) { - numTerms = terms.size(); - } else { - numTerms = -1; - } - final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO); - boolean success = false; - try (OrdinalsBuilder builder = new OrdinalsBuilder(numTerms, reader.maxDoc(), acceptableTransientOverheadRatio)) { - - // we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support - // empty strings twice. ie. them merge fails for long output. - TermsEnum termsEnum = filter(terms, reader); - PostingsEnum docsEnum = null; - for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) { - final long termOrd = builder.nextOrdinal(); - fstBuilder.add(Util.toIntsRef(term, scratch), (long) termOrd); - docsEnum = termsEnum.postings(null, docsEnum, PostingsEnum.NONE); - for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { - builder.addDoc(docId); - } - } - - FST fst = fstBuilder.finish(); - - final Ordinals ordinals = builder.build(fieldDataType.getSettings()); - - data = new FSTBytesAtomicFieldData(fst, ordinals); - success = true; - return data; - } finally { - if (success) { - estimator.afterLoad(null, data.ramBytesUsed()); - } - - } - } -} diff --git a/core/src/main/java/org/elasticsearch/index/fielddata/plain/GeoPointCompressedAtomicFieldData.java b/core/src/main/java/org/elasticsearch/index/fielddata/plain/GeoPointCompressedAtomicFieldData.java deleted file mode 100644 index 2a5621b58af..00000000000 --- a/core/src/main/java/org/elasticsearch/index/fielddata/plain/GeoPointCompressedAtomicFieldData.java +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.elasticsearch.index.fielddata.plain; - -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.RandomAccessOrds; -import org.apache.lucene.index.SortedDocValues; -import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.Accountables; -import org.apache.lucene.util.BitSet; -import org.apache.lucene.util.RamUsageEstimator; -import org.apache.lucene.util.packed.PagedMutable; -import org.elasticsearch.common.geo.GeoPoint; -import org.elasticsearch.index.fielddata.FieldData; -import org.elasticsearch.index.fielddata.GeoPointValues; -import org.elasticsearch.index.fielddata.MultiGeoPointValues; -import org.elasticsearch.index.fielddata.ordinals.Ordinals; -import org.elasticsearch.index.mapper.geo.GeoPointFieldMapper; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; - -/** - * Field data atomic impl for geo points with lossy compression. - */ -public abstract class GeoPointCompressedAtomicFieldData extends AbstractAtomicGeoPointFieldData { - - @Override - public void close() { - } - - static class WithOrdinals extends GeoPointCompressedAtomicFieldData { - - private final GeoPointFieldMapper.Encoding encoding; - private final PagedMutable lon, lat; - private final Ordinals ordinals; - private final int maxDoc; - - public WithOrdinals(GeoPointFieldMapper.Encoding encoding, PagedMutable lon, PagedMutable lat, Ordinals ordinals, int maxDoc) { - super(); - this.encoding = encoding; - this.lon = lon; - this.lat = lat; - this.ordinals = ordinals; - this.maxDoc = maxDoc; - } - - @Override - public long ramBytesUsed() { - return RamUsageEstimator.NUM_BYTES_INT/*size*/ + lon.ramBytesUsed() + lat.ramBytesUsed(); - } - - @Override - public Collection getChildResources() { - List resources = new ArrayList<>(); - resources.add(Accountables.namedAccountable("latitude", lat)); - resources.add(Accountables.namedAccountable("longitude", lon)); - return Collections.unmodifiableList(resources); - } - - @Override - public MultiGeoPointValues getGeoPointValues() { - final RandomAccessOrds ords = ordinals.ordinals(); - final SortedDocValues singleOrds = DocValues.unwrapSingleton(ords); - if (singleOrds != null) { - final GeoPoint point = new GeoPoint(); - final GeoPointValues values = new GeoPointValues() { - @Override - public GeoPoint get(int docID) { - final int ord = singleOrds.getOrd(docID); - if (ord >= 0) { - encoding.decode(lat.get(ord), lon.get(ord), point); - } else { - point.reset(0, 0); - } - return point; - } - }; - return FieldData.singleton(values, DocValues.docsWithValue(singleOrds, maxDoc)); - } else { - final GeoPoint point = new GeoPoint(); - return new MultiGeoPointValues() { - - @Override - public GeoPoint valueAt(int index) { - final long ord = ords.ordAt(index); - encoding.decode(lat.get(ord), lon.get(ord), point); - return point; - } - - @Override - public void setDocument(int docId) { - ords.setDocument(docId); - } - - @Override - public int count() { - return ords.cardinality(); - } - }; - } - } - } - - /** - * Assumes unset values are marked in bitset, and docId is used as the index to the value array. - */ - public static class Single extends GeoPointCompressedAtomicFieldData { - - private final GeoPointFieldMapper.Encoding encoding; - private final PagedMutable lon, lat; - private final BitSet set; - - public Single(GeoPointFieldMapper.Encoding encoding, PagedMutable lon, PagedMutable lat, BitSet set) { - super(); - this.encoding = encoding; - this.lon = lon; - this.lat = lat; - this.set = set; - } - - @Override - public long ramBytesUsed() { - return RamUsageEstimator.NUM_BYTES_INT/*size*/ + lon.ramBytesUsed() + lat.ramBytesUsed() + (set == null ? 0 : set.ramBytesUsed()); - } - - @Override - public Collection getChildResources() { - List resources = new ArrayList<>(); - resources.add(Accountables.namedAccountable("latitude", lat)); - resources.add(Accountables.namedAccountable("longitude", lon)); - if (set != null) { - resources.add(Accountables.namedAccountable("missing bitset", set)); - } - return Collections.unmodifiableList(resources); - } - - @Override - public MultiGeoPointValues getGeoPointValues() { - final GeoPoint point = new GeoPoint(); - final GeoPointValues values = new GeoPointValues() { - @Override - public GeoPoint get(int docID) { - encoding.decode(lat.get(docID), lon.get(docID), point); - return point; - } - }; - return FieldData.singleton(values, set); - } - } -} \ No newline at end of file diff --git a/core/src/main/java/org/elasticsearch/index/fielddata/plain/GeoPointCompressedIndexFieldData.java b/core/src/main/java/org/elasticsearch/index/fielddata/plain/GeoPointCompressedIndexFieldData.java deleted file mode 100644 index 2d38c714464..00000000000 --- a/core/src/main/java/org/elasticsearch/index/fielddata/plain/GeoPointCompressedIndexFieldData.java +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.elasticsearch.index.fielddata.plain; - -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.RandomAccessOrds; -import org.apache.lucene.index.Terms; -import org.apache.lucene.util.BitSet; -import org.apache.lucene.util.packed.PackedInts; -import org.apache.lucene.util.packed.PagedMutable; -import org.elasticsearch.common.breaker.CircuitBreaker; -import org.elasticsearch.common.geo.GeoPoint; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.unit.DistanceUnit; -import org.elasticsearch.common.unit.DistanceUnit.Distance; -import org.elasticsearch.common.util.BigArrays; -import org.elasticsearch.index.Index; -import org.elasticsearch.index.fielddata.*; -import org.elasticsearch.index.fielddata.ordinals.Ordinals; -import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder; -import org.elasticsearch.index.mapper.FieldMapper; -import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.MapperService; -import org.elasticsearch.index.mapper.geo.GeoPointFieldMapper; -import org.elasticsearch.index.settings.IndexSettings; -import org.elasticsearch.indices.breaker.CircuitBreakerService; - -/** - */ -public class GeoPointCompressedIndexFieldData extends AbstractIndexGeoPointFieldData { - - private static final String PRECISION_KEY = "precision"; - private static final Distance DEFAULT_PRECISION_VALUE = new Distance(1, DistanceUnit.CENTIMETERS); - private final CircuitBreakerService breakerService; - - public static class Builder implements IndexFieldData.Builder { - - @Override - public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, MappedFieldType fieldType, IndexFieldDataCache cache, - CircuitBreakerService breakerService, MapperService mapperService) { - FieldDataType type = fieldType.fieldDataType(); - final String precisionAsString = type.getSettings().get(PRECISION_KEY); - final Distance precision; - if (precisionAsString != null) { - precision = Distance.parseDistance(precisionAsString); - } else { - precision = DEFAULT_PRECISION_VALUE; - } - return new GeoPointCompressedIndexFieldData(index, indexSettings, fieldType.names(), fieldType.fieldDataType(), cache, precision, breakerService); - } - } - - private final GeoPointFieldMapper.Encoding encoding; - - public GeoPointCompressedIndexFieldData(Index index, @IndexSettings Settings indexSettings, MappedFieldType.Names fieldNames, - FieldDataType fieldDataType, IndexFieldDataCache cache, Distance precision, - CircuitBreakerService breakerService) { - super(index, indexSettings, fieldNames, fieldDataType, cache); - this.encoding = GeoPointFieldMapper.Encoding.of(precision); - this.breakerService = breakerService; - } - - @Override - public AtomicGeoPointFieldData loadDirect(LeafReaderContext context) throws Exception { - LeafReader reader = context.reader(); - - Terms terms = reader.terms(getFieldNames().indexName()); - AtomicGeoPointFieldData data = null; - // TODO: Use an actual estimator to estimate before loading. - NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker(CircuitBreaker.FIELDDATA)); - if (terms == null) { - data = AbstractAtomicGeoPointFieldData.empty(reader.maxDoc()); - estimator.afterLoad(null, data.ramBytesUsed()); - return data; - } - final long initialSize; - if (terms.size() >= 0) { - initialSize = 1 + terms.size(); - } else { // codec doesn't expose size - initialSize = 1 + Math.min(1 << 12, reader.maxDoc()); - } - final int pageSize = Integer.highestOneBit(BigArrays.PAGE_SIZE_IN_BYTES * 8 / encoding.numBitsPerCoordinate() - 1) << 1; - PagedMutable lat = new PagedMutable(initialSize, pageSize, encoding.numBitsPerCoordinate(), PackedInts.COMPACT); - PagedMutable lon = new PagedMutable(initialSize, pageSize, encoding.numBitsPerCoordinate(), PackedInts.COMPACT); - final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO); - boolean success = false; - try (OrdinalsBuilder builder = new OrdinalsBuilder(terms.size(), reader.maxDoc(), acceptableTransientOverheadRatio)) { - final GeoPointEnum iter = new GeoPointEnum(builder.buildFromTerms(terms.iterator())); - GeoPoint point; - while ((point = iter.next()) != null) { - final long ord = builder.currentOrdinal(); - if (lat.size() <= ord) { - final long newSize = BigArrays.overSize(ord + 1); - lat = lat.resize(newSize); - lon = lon.resize(newSize); - } - lat.set(ord, encoding.encodeCoordinate(point.getLat())); - lon.set(ord, encoding.encodeCoordinate(point.getLon())); - } - - Ordinals build = builder.build(fieldDataType.getSettings()); - RandomAccessOrds ordinals = build.ordinals(); - if (FieldData.isMultiValued(ordinals) || CommonSettings.getMemoryStorageHint(fieldDataType) == CommonSettings.MemoryStorageFormat.ORDINALS) { - if (lat.size() != ordinals.getValueCount()) { - lat = lat.resize(ordinals.getValueCount()); - lon = lon.resize(ordinals.getValueCount()); - } - data = new GeoPointCompressedAtomicFieldData.WithOrdinals(encoding, lon, lat, build, reader.maxDoc()); - } else { - int maxDoc = reader.maxDoc(); - PagedMutable sLat = new PagedMutable(reader.maxDoc(), pageSize, encoding.numBitsPerCoordinate(), PackedInts.COMPACT); - PagedMutable sLon = new PagedMutable(reader.maxDoc(), pageSize, encoding.numBitsPerCoordinate(), PackedInts.COMPACT); - final long missing = encoding.encodeCoordinate(0); - for (int i = 0; i < maxDoc; i++) { - ordinals.setDocument(i); - final long nativeOrdinal = ordinals.nextOrd(); - if (nativeOrdinal >= 0) { - sLat.set(i, lat.get(nativeOrdinal)); - sLon.set(i, lon.get(nativeOrdinal)); - } else { - sLat.set(i, missing); - sLon.set(i, missing); - } - } - BitSet set = builder.buildDocsWithValuesSet(); - data = new GeoPointCompressedAtomicFieldData.Single(encoding, sLon, sLat, set); - } - success = true; - return data; - } finally { - if (success) { - estimator.afterLoad(null, data.ramBytesUsed()); - } - - } - - } - - -} \ No newline at end of file diff --git a/core/src/test/java/org/elasticsearch/index/fielddata/DuelFieldDataTests.java b/core/src/test/java/org/elasticsearch/index/fielddata/DuelFieldDataTests.java index 1b3168af985..2fe127dd796 100644 --- a/core/src/test/java/org/elasticsearch/index/fielddata/DuelFieldDataTests.java +++ b/core/src/test/java/org/elasticsearch/index/fielddata/DuelFieldDataTests.java @@ -87,7 +87,6 @@ public class DuelFieldDataTests extends AbstractFieldDataTests { } LeafReaderContext context = refreshReader(); Map typeMap = new HashMap<>(); - typeMap.put(new FieldDataType("string", Settings.builder().put("format", "fst")), Type.Bytes); typeMap.put(new FieldDataType("string", Settings.builder().put("format", "paged_bytes")), Type.Bytes); typeMap.put(new FieldDataType("byte", Settings.builder().put("format", "array")), Type.Integer); typeMap.put(new FieldDataType("short", Settings.builder().put("format", "array")), Type.Integer); @@ -325,7 +324,6 @@ public class DuelFieldDataTests extends AbstractFieldDataTests { } LeafReaderContext context = refreshReader(); Map typeMap = new HashMap<>(); - typeMap.put(new FieldDataType("string", Settings.builder().put("format", "fst")), Type.Bytes); typeMap.put(new FieldDataType("string", Settings.builder().put("format", "paged_bytes")), Type.Bytes); typeMap.put(new FieldDataType("string", Settings.builder().put("format", "doc_values")), Type.Bytes); // TODO add filters @@ -384,7 +382,6 @@ public class DuelFieldDataTests extends AbstractFieldDataTests { refreshReader(); Map typeMap = new HashMap(); - typeMap.put(new FieldDataType("string", Settings.builder().put("format", "fst")), Type.Bytes); typeMap.put(new FieldDataType("string", Settings.builder().put("format", "paged_bytes")), Type.Bytes); typeMap.put(new FieldDataType("string", Settings.builder().put("format", "doc_values")), Type.Bytes); @@ -437,7 +434,6 @@ public class DuelFieldDataTests extends AbstractFieldDataTests { Map typeMap = new HashMap<>(); final Distance precision = new Distance(1, randomFrom(DistanceUnit.values())); typeMap.put(new FieldDataType("geo_point", Settings.builder().put("format", "array")), Type.GeoPoint); - typeMap.put(new FieldDataType("geo_point", Settings.builder().put("format", "compressed").put("precision", precision)), Type.GeoPoint); typeMap.put(new FieldDataType("geo_point", Settings.builder().put("format", "doc_values")), Type.GeoPoint); ArrayList> list = new ArrayList<>(typeMap.entrySet()); diff --git a/core/src/test/java/org/elasticsearch/index/fielddata/FSTPackedBytesStringFieldDataTests.java b/core/src/test/java/org/elasticsearch/index/fielddata/FSTPackedBytesStringFieldDataTests.java deleted file mode 100644 index a64ab8d98bd..00000000000 --- a/core/src/test/java/org/elasticsearch/index/fielddata/FSTPackedBytesStringFieldDataTests.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.fielddata; - -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder; - -/** - */ -public class FSTPackedBytesStringFieldDataTests extends AbstractStringFieldDataTests { - - @Override - protected FieldDataType getFieldDataType() { - return new FieldDataType("string", Settings.builder().put("format", "fst").put(OrdinalsBuilder.FORCE_MULTI_ORDINALS, randomBoolean())); - } -} diff --git a/core/src/test/java/org/elasticsearch/index/fielddata/FilterFieldDataTest.java b/core/src/test/java/org/elasticsearch/index/fielddata/FilterFieldDataTest.java index 3c13999eb37..e4b28223389 100644 --- a/core/src/test/java/org/elasticsearch/index/fielddata/FilterFieldDataTest.java +++ b/core/src/test/java/org/elasticsearch/index/fielddata/FilterFieldDataTest.java @@ -60,7 +60,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests { } writer.forceMerge(1, true); LeafReaderContext context = refreshReader(); - String[] formats = new String[] { "fst", "paged_bytes"}; + String[] formats = new String[] { "paged_bytes"}; for (String format : formats) { { @@ -153,7 +153,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests { logger.debug(hundred + " " + ten + " " + five); writer.forceMerge(1, true); LeafReaderContext context = refreshReader(); - String[] formats = new String[] { "fst", "paged_bytes"}; + String[] formats = new String[] { "paged_bytes"}; for (String format : formats) { { ifdService.clear(); diff --git a/core/src/test/java/org/elasticsearch/index/fielddata/IndexFieldDataServiceTests.java b/core/src/test/java/org/elasticsearch/index/fielddata/IndexFieldDataServiceTests.java index abccb4851c9..3af20da91ed 100644 --- a/core/src/test/java/org/elasticsearch/index/fielddata/IndexFieldDataServiceTests.java +++ b/core/src/test/java/org/elasticsearch/index/fielddata/IndexFieldDataServiceTests.java @@ -28,7 +28,6 @@ import org.apache.lucene.store.RAMDirectory; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.fielddata.plain.*; import org.elasticsearch.index.mapper.ContentPath; -import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.Mapper.BuilderContext; import org.elasticsearch.index.mapper.MapperBuilders; @@ -101,10 +100,10 @@ public class IndexFieldDataServiceTests extends ElasticsearchSingleNodeTest { final IndexService indexService = createIndex("test"); final IndexFieldDataService ifdService = indexService.fieldData(); final BuilderContext ctx = new BuilderContext(indexService.settingsService().getSettings(), new ContentPath(1)); - final MappedFieldType stringMapper = MapperBuilders.stringField("string").tokenized(false).fieldDataSettings(DOC_VALUES_SETTINGS).fieldDataSettings(Settings.builder().put("format", "fst").build()).build(ctx).fieldType(); + final MappedFieldType stringMapper = MapperBuilders.stringField("string").tokenized(false).fieldDataSettings(DOC_VALUES_SETTINGS).fieldDataSettings(Settings.builder().put("format", "disabled").build()).build(ctx).fieldType(); ifdService.clear(); IndexFieldData fd = ifdService.getForField(stringMapper); - assertTrue(fd instanceof FSTBytesIndexFieldData); + assertTrue(fd instanceof DisabledIndexFieldData); final Settings fdSettings = Settings.builder().put("format", "array").build(); for (MappedFieldType mapper : Arrays.asList( @@ -133,7 +132,7 @@ public class IndexFieldDataServiceTests extends ElasticsearchSingleNodeTest { final IndexService indexService = createIndex("test"); final IndexFieldDataService ifdService = indexService.fieldData(); final BuilderContext ctx = new BuilderContext(indexService.settingsService().getSettings(), new ContentPath(1)); - final MappedFieldType mapper1 = MapperBuilders.stringField("s").tokenized(false).fieldDataSettings(Settings.builder().put(FieldDataType.FORMAT_KEY, "paged_bytes").build()).build(ctx).fieldType(); + final MappedFieldType mapper1 = MapperBuilders.stringField("s").tokenized(false).docValues(true).fieldDataSettings(Settings.builder().put(FieldDataType.FORMAT_KEY, "paged_bytes").build()).build(ctx).fieldType(); final IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(new KeywordAnalyzer())); Document doc = new Document(); doc.add(new StringField("s", "thisisastring", Store.NO)); @@ -150,18 +149,10 @@ public class IndexFieldDataServiceTests extends ElasticsearchSingleNodeTest { // write new segment writer.addDocument(doc); final IndexReader reader2 = DirectoryReader.open(writer, true); - final MappedFieldType mapper2 = MapperBuilders.stringField("s").tokenized(false).fieldDataSettings(Settings.builder().put(FieldDataType.FORMAT_KEY, "fst").build()).build(ctx).fieldType(); + final MappedFieldType mapper2 = MapperBuilders.stringField("s").tokenized(false).docValues(true).fieldDataSettings(Settings.builder().put(FieldDataType.FORMAT_KEY, "doc_values").build()).build(ctx).fieldType(); ifdService.onMappingUpdate(); ifd = ifdService.getForField(mapper2); - assertThat(ifd, instanceOf(FSTBytesIndexFieldData.class)); - for (LeafReaderContext arc : reader2.leaves()) { - AtomicFieldData afd = ifd.load(arc); - if (oldSegments.contains(arc.reader())) { - assertThat(afd, instanceOf(PagedBytesAtomicFieldData.class)); - } else { - assertThat(afd, instanceOf(FSTBytesAtomicFieldData.class)); - } - } + assertThat(ifd, instanceOf(SortedSetDVOrdinalsIndexFieldData.class)); reader1.close(); reader2.close(); writer.close(); diff --git a/core/src/test/java/org/elasticsearch/index/mapper/geo/GeoMappingTests.java b/core/src/test/java/org/elasticsearch/index/mapper/geo/GeoMappingTests.java deleted file mode 100644 index 0c292d9779d..00000000000 --- a/core/src/test/java/org/elasticsearch/index/mapper/geo/GeoMappingTests.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.mapper.geo; - -import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsRequest; -import org.elasticsearch.cluster.metadata.MappingMetaData; -import org.elasticsearch.common.collect.ImmutableOpenMap; -import org.elasticsearch.common.unit.DistanceUnit; -import org.elasticsearch.common.unit.DistanceUnit.Distance; -import org.elasticsearch.common.xcontent.XContentFactory; -import org.elasticsearch.test.ElasticsearchIntegrationTest; - -import java.util.Map; - -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; - -public class GeoMappingTests extends ElasticsearchIntegrationTest { - - public void testUpdatePrecision() throws Exception { - assertAcked(prepareCreate("test").addMapping("type1", XContentFactory.jsonBuilder().startObject() - .startObject("type1") - .startObject("properties") - .startObject("pin") - .field("type", "geo_point") - .startObject("fielddata") - .field("format", "compressed") - .field("precision", "2mm") - .endObject() - .endObject() - .endObject() - .endObject() - .endObject()).get()); - ensureYellow(); - assertPrecision(new Distance(2, DistanceUnit.MILLIMETERS)); - - assertAcked(client().admin().indices().preparePutMapping("test").setType("type1").setSource(XContentFactory.jsonBuilder().startObject() - .startObject("type1") - .startObject("properties") - .startObject("pin") - .field("type", "geo_point") - .startObject("fielddata") - .field("format", "compressed") - .field("precision", "11m") - .endObject() - .endObject() - .endObject() - .endObject() - .endObject()).get()); - - assertPrecision(new Distance(11, DistanceUnit.METERS)); - } - - private void assertPrecision(Distance expected) throws Exception { - ImmutableOpenMap> mappings = client().admin().indices().getMappings(new GetMappingsRequest().indices("test").types("type1")).actionGet().getMappings(); - assertNotNull(mappings); - Map properties = (Map) mappings.get("test").get("type1").getSourceAsMap().get("properties"); - Map pinProperties = (Map) properties.get("pin"); - Map pinFieldData = (Map) pinProperties.get("fielddata"); - Distance precision = Distance.parseDistance(pinFieldData.get("precision").toString()); - assertEquals(expected, precision); - } - -} diff --git a/core/src/test/java/org/elasticsearch/index/mapper/multifield/MultiFieldTests.java b/core/src/test/java/org/elasticsearch/index/mapper/multifield/MultiFieldTests.java index 2d68f801d27..d63a15b6afb 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/multifield/MultiFieldTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/multifield/MultiFieldTests.java @@ -462,7 +462,6 @@ public class MultiFieldTests extends ElasticsearchSingleNodeTest { possibleSettings.put("filter.frequency.min", 1); possibleSettings.put("filter.frequency.max", 2); possibleSettings.put("filter.regex.pattern", ".*"); - possibleSettings.put("format", "fst"); possibleSettings.put("loading", "eager"); possibleSettings.put("foo", "bar"); possibleSettings.put("zetting", "zValue"); diff --git a/core/src/test/java/org/elasticsearch/index/mapper/string/SimpleStringMappingTests.java b/core/src/test/java/org/elasticsearch/index/mapper/string/SimpleStringMappingTests.java index 0583e289994..4c142e21239 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/string/SimpleStringMappingTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/string/SimpleStringMappingTests.java @@ -381,7 +381,7 @@ public class SimpleStringMappingTests extends ElasticsearchSingleNodeTest { .startObject("str1") .field("type", "string") .startObject("fielddata") - .field("format", "fst") + .field("format", "paged_bytes") .endObject() .endObject() .startObject("str2") diff --git a/core/src/test/java/org/elasticsearch/index/mapper/update/all_mapping_create_index.json b/core/src/test/java/org/elasticsearch/index/mapper/update/all_mapping_create_index.json index 2b9c42d50b2..e9604ae458f 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/update/all_mapping_create_index.json +++ b/core/src/test/java/org/elasticsearch/index/mapper/update/all_mapping_create_index.json @@ -12,7 +12,7 @@ "search_analyzer": "whitespace", "similarity": "my_similarity", "fielddata": { - "format": "fst" + "format": "paged_bytes" } } } diff --git a/core/src/test/java/org/elasticsearch/indices/memory/breaker/RandomExceptionCircuitBreakerTests.java b/core/src/test/java/org/elasticsearch/indices/memory/breaker/RandomExceptionCircuitBreakerTests.java index c2da93b9ab3..7b2a12f8f98 100644 --- a/core/src/test/java/org/elasticsearch/indices/memory/breaker/RandomExceptionCircuitBreakerTests.java +++ b/core/src/test/java/org/elasticsearch/indices/memory/breaker/RandomExceptionCircuitBreakerTests.java @@ -70,9 +70,6 @@ public class RandomExceptionCircuitBreakerTests extends ElasticsearchIntegration .field("type", "string") .field("index", "not_analyzed") .field("doc_values", randomBoolean()) - .startObject("fielddata") - .field("format", randomBytesFieldDataFormat()) - .endObject() // fielddata .endObject() // test-str .startObject("test-num") // I don't use randomNumericType() here because I don't want "byte", and I want "float" and "double" diff --git a/core/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java b/core/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java index 174f2a97a12..5f0225f4f77 100644 --- a/core/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java +++ b/core/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java @@ -391,7 +391,6 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase .field("match_mapping_type", "string") .startObject("mapping") .startObject("fielddata") - .field(FieldDataType.FORMAT_KEY, randomFrom("paged_bytes", "fst")) .field(Loading.KEY, randomLoadingValues()) .endObject() .endObject() @@ -1769,14 +1768,6 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase return randomFrom(Arrays.asList("array", "doc_values")); } - /** - * Returns a random bytes field data format from the choices of - * "paged_bytes", "fst", or "doc_values". - */ - public static String randomBytesFieldDataFormat() { - return randomFrom(Arrays.asList("paged_bytes", "fst")); - } - /** * Returns a random JODA Time Zone based on Java Time Zones */ diff --git a/docs/reference/index-modules/fielddata.asciidoc b/docs/reference/index-modules/fielddata.asciidoc index 5698af6d4da..b54c45a04e0 100644 --- a/docs/reference/index-modules/fielddata.asciidoc +++ b/docs/reference/index-modules/fielddata.asciidoc @@ -86,13 +86,13 @@ breaker using The field data format controls how field data should be stored. Depending on the field type, there might be several field data types -available. In particular, string and numeric types support the `doc_values` +available. In particular, string, geo-point and numeric types support the `doc_values` format which allows for computing the field data data-structures at indexing time and storing them on disk. Although it will make the index larger and may be slightly slower, this implementation will be more near-realtime-friendly and will require much less memory from the JVM than other implementations. -Here is an example of how to configure the `tag` field to use the `fst` field +Here is an example of how to configure the `tag` field to use the `paged_bytes` field data format. [source,js] @@ -101,31 +101,23 @@ data format. "tag": { "type": "string", "fielddata": { - "format": "fst" + "format": "paged_bytes" } } } -------------------------------------------------- It is possible to change the field data format (and the field data settings -in general) on a live index by using the update mapping API. When doing so, -field data which had already been loaded for existing segments will remain -alive while new segments will use the new field data configuration. Thanks to -the background merging process, all segments will eventually use the new -field data format. +in general) on a live index by using the update mapping API. [float] ==== String field data types -`paged_bytes` (default):: +`paged_bytes` (default on analyzed string fields):: Stores unique terms sequentially in a large buffer and maps documents to the indices of the terms they contain in this large buffer. -`fst`:: - Stores terms in a FST. Slower to build than `paged_bytes` but can help lower - memory usage if many terms share common prefixes and/or suffixes. - -`doc_values`:: +`doc_values` (default when index is set to `not_analyzed`):: Computes and stores field data data-structures on disk at indexing time. Lowers memory usage but only works on non-analyzed strings (`index`: `no` or `not_analyzed`). @@ -133,19 +125,19 @@ field data format. [float] ==== Numeric field data types -`array` (default):: +`array`:: Stores field values in memory using arrays. -`doc_values`:: +`doc_values` (default unless doc values are disabled):: Computes and stores field data data-structures on disk at indexing time. [float] ==== Geo point field data types -`array` (default):: +`array`:: Stores latitudes and longitudes in arrays. -`doc_values`:: +`doc_values` (default unless doc values are disabled):: Computes and stores field data data-structures on disk at indexing time. [float]