diff --git a/docs/reference/index-modules/fielddata.asciidoc b/docs/reference/index-modules/fielddata.asciidoc index 5053669c94d..2ac2b4f6340 100644 --- a/docs/reference/index-modules/fielddata.asciidoc +++ b/docs/reference/index-modules/fielddata.asciidoc @@ -89,6 +89,9 @@ field data format. `array` (default):: Stores latitudes and longitudes in arrays. +`doc_values`:: + Computes and stores field data data-structures on disk at indexing time. + [float] === Fielddata loading diff --git a/src/main/java/org/elasticsearch/index/fielddata/IndexFieldDataService.java b/src/main/java/org/elasticsearch/index/fielddata/IndexFieldDataService.java index 9c68546cad7..091dc8d4efb 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/IndexFieldDataService.java +++ b/src/main/java/org/elasticsearch/index/fielddata/IndexFieldDataService.java @@ -75,6 +75,7 @@ public class IndexFieldDataService extends AbstractIndexComponent { .put("short", new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.SHORT)) .put("int", new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.INT)) .put("long", new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.LONG)) + .put("geo_point", new GeoPointBinaryDVIndexFieldData.Builder()) .immutableMap(); buildersByTypeAndFormat = MapBuilder., IndexFieldData.Builder>newMapBuilder() @@ -108,6 +109,7 @@ public class IndexFieldDataService extends AbstractIndexComponent { .put(Tuple.tuple("long", DISABLED_FORMAT), new DisabledIndexFieldData.Builder()) .put(Tuple.tuple("geo_point", ARRAY_FORMAT), new GeoPointDoubleArrayIndexFieldData.Builder()) + .put(Tuple.tuple("geo_point", DOC_VALUES_FORMAT), new GeoPointBinaryDVIndexFieldData.Builder()) .put(Tuple.tuple("geo_point", DISABLED_FORMAT), new DisabledIndexFieldData.Builder()) .put(Tuple.tuple("geo_point", COMPRESSED_FORMAT), new GeoPointCompressedIndexFieldData.Builder()) diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/GeoPointBinaryDVAtomicFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/GeoPointBinaryDVAtomicFieldData.java new file mode 100644 index 00000000000..468cb0d80de --- /dev/null +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/GeoPointBinaryDVAtomicFieldData.java @@ -0,0 +1,105 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.fielddata.plain; + +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.geo.GeoPoint; +import org.elasticsearch.common.util.ByteUtils; +import org.elasticsearch.index.fielddata.AtomicGeoPointFieldData; +import org.elasticsearch.index.fielddata.GeoPointValues; +import org.elasticsearch.index.fielddata.ScriptDocValues; + +final class GeoPointBinaryDVAtomicFieldData extends AtomicGeoPointFieldData { + + private final AtomicReader reader; + private final BinaryDocValues values; + + GeoPointBinaryDVAtomicFieldData(AtomicReader reader, BinaryDocValues values) { + super(); + this.reader = reader; + this.values = values == null ? BinaryDocValues.EMPTY : values; + } + + @Override + public boolean isMultiValued() { + return false; + } + + @Override + public boolean isValuesOrdered() { + return false; + } + + @Override + public int getNumDocs() { + return reader.maxDoc(); + } + + @Override + public long getNumberUniqueValues() { + return Long.MAX_VALUE; + } + + @Override + public long getMemorySizeInBytes() { + return -1; // not exposed by Lucene + } + + @Override + public ScriptDocValues getScriptValues() { + return new ScriptDocValues.GeoPoints(getGeoPointValues()); + } + + @Override + public void close() { + // no-op + } + + @Override + public GeoPointValues getGeoPointValues() { + return new GeoPointValues(true) { + + final BytesRef bytes = new BytesRef(); + int i = Integer.MAX_VALUE; + int valueCount = 0; + final GeoPoint point = new GeoPoint(); + + @Override + public int setDocument(int docId) { + values.get(docId, bytes); + assert bytes.length % 16 == 0; + i = 0; + return valueCount = (bytes.length >>> 4); + } + + @Override + public GeoPoint nextValue() { + assert i < 2 * valueCount; + final double lat = ByteUtils.readDoubleLE(bytes.bytes, bytes.offset + i++ * 8); + final double lon = ByteUtils.readDoubleLE(bytes.bytes, bytes.offset + i++ * 8); + return point.reset(lat, lon); + } + + }; + } + +} diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/GeoPointBinaryDVIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/GeoPointBinaryDVIndexFieldData.java new file mode 100644 index 00000000000..0d580749750 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/GeoPointBinaryDVIndexFieldData.java @@ -0,0 +1,74 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.fielddata.plain; + +import org.apache.lucene.index.AtomicReaderContext; +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.ElasticSearchIllegalStateException; +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.fielddata.*; +import org.elasticsearch.index.fielddata.fieldcomparator.SortMode; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.FieldMapper.Names; + +import java.io.IOException; + +public class GeoPointBinaryDVIndexFieldData extends DocValuesIndexFieldData implements IndexGeoPointFieldData> { + + public GeoPointBinaryDVIndexFieldData(Index index, Names fieldNames) { + super(index, fieldNames); + } + + @Override + public boolean valuesOrdered() { + return false; + } + + @Override + public final XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) { + throw new ElasticSearchIllegalArgumentException("can't sort on geo_point field without using specific sorting feature, like geo_distance"); + } + + @Override + public AtomicGeoPointFieldData load(AtomicReaderContext context) { + try { + return new GeoPointBinaryDVAtomicFieldData(context.reader(), context.reader().getBinaryDocValues(fieldNames.indexName())); + } catch (IOException e) { + throw new ElasticSearchIllegalStateException("Cannot load doc values", e); + } + } + + @Override + public AtomicGeoPointFieldData loadDirect(AtomicReaderContext context) throws Exception { + return load(context); + } + + public static class Builder implements IndexFieldData.Builder { + + @Override + public IndexFieldData build(Index index, Settings indexSettings, FieldMapper mapper, IndexFieldDataCache cache) { + final FieldMapper.Names fieldNames = mapper.names(); + return new GeoPointBinaryDVIndexFieldData(index, fieldNames); + } + + } +} diff --git a/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java index 2254ca02579..67d9c7e034d 100644 --- a/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java @@ -19,9 +19,13 @@ package org.elasticsearch.index.mapper.geo; +import com.carrotsearch.hppc.ObjectOpenHashSet; +import com.carrotsearch.hppc.cursors.ObjectCursor; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.ElasticSearchIllegalStateException; import org.elasticsearch.common.Nullable; @@ -32,6 +36,7 @@ import org.elasticsearch.common.geo.GeoPoint; import org.elasticsearch.common.geo.GeoUtils; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.DistanceUnit; +import org.elasticsearch.common.util.ByteUtils; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.support.XContentMapValues; @@ -40,14 +45,13 @@ import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider; import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider; import org.elasticsearch.index.fielddata.FieldDataType; import org.elasticsearch.index.mapper.*; -import org.elasticsearch.index.mapper.core.AbstractFieldMapper; -import org.elasticsearch.index.mapper.core.DoubleFieldMapper; -import org.elasticsearch.index.mapper.core.NumberFieldMapper; -import org.elasticsearch.index.mapper.core.StringFieldMapper; +import org.elasticsearch.index.mapper.core.*; +import org.elasticsearch.index.mapper.core.NumberFieldMapper.CustomNumericDocValuesField; import org.elasticsearch.index.mapper.object.ArrayValueMapperParser; import org.elasticsearch.index.similarity.SimilarityProvider; import java.io.IOException; +import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; @@ -413,10 +417,6 @@ public class GeoPointFieldMapper extends AbstractFieldMapper implement this.normalizeLat = normalizeLat; this.normalizeLon = normalizeLon; - - if (hasDocValues()) { - throw new ElasticSearchIllegalStateException("Geo points don't support doc values"); // yet - } } @Override @@ -571,42 +571,15 @@ public class GeoPointFieldMapper extends AbstractFieldMapper implement } private void parseLatLon(ParseContext context, double lat, double lon) throws IOException { - if (normalizeLat || normalizeLon) { - GeoPoint point = new GeoPoint(lat, lon); - GeoUtils.normalizePoint(point, normalizeLat, normalizeLon); - lat = point.lat(); - lon = point.lon(); - } - - if (validateLat) { - if (lat > 90.0 || lat < -90.0) { - throw new ElasticSearchIllegalArgumentException("illegal latitude value [" + lat + "] for " + name()); - } - } - if (validateLon) { - if (lon > 180.0 || lon < -180) { - throw new ElasticSearchIllegalArgumentException("illegal longitude value [" + lon + "] for " + name()); - } - } - - if (fieldType.indexed() || fieldType.stored()) { - Field field = new Field(names.indexName(), Double.toString(lat) + ',' + Double.toString(lon), fieldType); - context.doc().add(field); - } - if (enableGeoHash) { - parseGeohashField(context, GeoHashUtils.encode(lat, lon, geoHashPrecision)); - } - if (enableLatLon) { - context.externalValue(lat); - latMapper.parse(context); - context.externalValue(lon); - lonMapper.parse(context); - } + parse(context, new GeoPoint(lat, lon), null); } private void parseGeohash(ParseContext context, String geohash) throws IOException { GeoPoint point = GeoHashUtils.decode(geohash); + parse(context, point, geohash); + } + private void parse(ParseContext context, GeoPoint point, String geohash) throws IOException { if (normalizeLat || normalizeLon) { GeoUtils.normalizePoint(point, normalizeLat, normalizeLon); } @@ -627,6 +600,9 @@ public class GeoPointFieldMapper extends AbstractFieldMapper implement context.doc().add(field); } if (enableGeoHash) { + if (geohash == null) { + geohash = GeoHashUtils.encode(point.lat(), point.lon()); + } parseGeohashField(context, geohash); } if (enableLatLon) { @@ -635,6 +611,15 @@ public class GeoPointFieldMapper extends AbstractFieldMapper implement context.externalValue(point.lon()); lonMapper.parse(context); } + if (hasDocValues()) { + CustomGeoPointDocValuesField field = (CustomGeoPointDocValuesField) context.doc().getByKey(names().indexName()); + if (field == null) { + field = new CustomGeoPointDocValuesField(names().indexName(), point.lat(), point.lon()); + context.doc().addWithKey(names().indexName(), field); + } else { + field.add(point.lat(), point.lon()); + } + } } @Override @@ -716,4 +701,38 @@ public class GeoPointFieldMapper extends AbstractFieldMapper implement } } + public static class CustomGeoPointDocValuesField extends CustomNumericDocValuesField { + + public static final FieldType TYPE = new FieldType(); + static { + TYPE.setDocValueType(FieldInfo.DocValuesType.BINARY); + TYPE.freeze(); + } + + private final ObjectOpenHashSet points; + + public CustomGeoPointDocValuesField(String name, double lat, double lon) { + super(name); + points = new ObjectOpenHashSet(2); + points.add(new GeoPoint(lat, lon)); + } + + public void add(double lat, double lon) { + points.add(new GeoPoint(lat, lon)); + } + + @Override + public BytesRef binaryValue() { + final byte[] bytes = new byte[points.size() * 16]; + int off = 0; + for (Iterator> it = points.iterator(); it.hasNext(); ) { + final GeoPoint point = it.next().value; + ByteUtils.writeDoubleLE(point.getLat(), bytes, off); + ByteUtils.writeDoubleLE(point.getLon(), bytes, off + 8); + off += 16; + } + return new BytesRef(bytes); + } + } + } diff --git a/src/test/java/org/elasticsearch/index/fielddata/DuelFieldDataTests.java b/src/test/java/org/elasticsearch/index/fielddata/DuelFieldDataTests.java index 05ac5314568..24c6dca11cd 100644 --- a/src/test/java/org/elasticsearch/index/fielddata/DuelFieldDataTests.java +++ b/src/test/java/org/elasticsearch/index/fielddata/DuelFieldDataTests.java @@ -369,19 +369,33 @@ public class DuelFieldDataTests extends AbstractFieldDataTests { } public void testDuelGeoPoints() throws Exception { + final String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("geopoint").field("type", "geo_point").startObject("fielddata").field("format", "doc_values").endObject().endObject() + .endObject().endObject().endObject().string(); + + final DocumentMapper mapper = MapperTestUtils.newParser().parse(mapping); + Random random = getRandom(); int atLeast = atLeast(random, 1000); - int maxValuesPerDoc = randomIntBetween(1, 3); + int maxValuesPerDoc = randomBoolean() ? 1 : randomIntBetween(2, 40); + // to test deduplication + double defaultLat = randomDouble() * 180 - 90; + double defaultLon = randomDouble() * 360 - 180; for (int i = 0; i < atLeast; i++) { - Document d = new Document(); - d.add(new StringField("_id", "" + i, Field.Store.NO)); final int numValues = randomInt(maxValuesPerDoc); + XContentBuilder doc = XContentFactory.jsonBuilder().startObject().startArray("geopoint"); for (int j = 0; j < numValues; ++j) { - final double lat = randomDouble() * 180 - 90; - final double lon = randomDouble() * 360 - 180; - d.add(new StringField("geopoint", lat + "," + lon, Field.Store.NO)); + if (randomBoolean()) { + doc.startObject().field("lat", defaultLat).field("lon", defaultLon).endObject(); + } else { + doc.startObject().field("lat", randomDouble() * 180 - 90).field("lon", randomDouble() * 360 - 180).endObject(); + } } - writer.addDocument(d); + doc = doc.endArray().endObject(); + final ParsedDocument d = mapper.parse("type", Integer.toString(i), doc.bytes()); + + writer.addDocument(d.rootDoc()); if (random.nextInt(10) == 0) { refreshReader(); } @@ -391,6 +405,7 @@ public class DuelFieldDataTests extends AbstractFieldDataTests { final Distance precision = new Distance(1, randomFrom(DistanceUnit.values())); typeMap.put(new FieldDataType("geo_point", ImmutableSettings.builder().put("format", "array")), Type.GeoPoint); typeMap.put(new FieldDataType("geo_point", ImmutableSettings.builder().put("format", "compressed").put("precision", precision)), Type.GeoPoint); + typeMap.put(new FieldDataType("geo_point", ImmutableSettings.builder().put("format", "doc_values")), Type.GeoPoint); ArrayList> list = new ArrayList>(typeMap.entrySet()); while (!list.isEmpty()) { diff --git a/src/test/java/org/elasticsearch/search/geo/GeoDistanceTests.java b/src/test/java/org/elasticsearch/search/geo/GeoDistanceTests.java index 59928bddf33..0f0bd902e63 100644 --- a/src/test/java/org/elasticsearch/search/geo/GeoDistanceTests.java +++ b/src/test/java/org/elasticsearch/search/geo/GeoDistanceTests.java @@ -52,11 +52,15 @@ import static org.hamcrest.Matchers.*; */ public class GeoDistanceTests extends ElasticsearchIntegrationTest { + private static String randomFieldDataFormat() { + return randomFrom(Arrays.asList("array", "compressed", "doc_values")); + } + @Test public void simpleDistanceTests() throws Exception { String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1") .startObject("properties").startObject("location").field("type", "geo_point").field("lat_lon", true) - .startObject("fielddata").field("format", randomFrom(Arrays.asList("array", "compressed"))).endObject().endObject().endObject() + .startObject("fielddata").field("format", randomFieldDataFormat()).endObject().endObject().endObject() .endObject().endObject().string(); client().admin().indices().prepareCreate("test").addMapping("type1", mapping).execute().actionGet(); client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().execute().actionGet(); @@ -208,7 +212,7 @@ public class GeoDistanceTests extends ElasticsearchIntegrationTest { public void testDistanceSortingMVFields() throws Exception { String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1") .startObject("properties").startObject("locations").field("type", "geo_point").field("lat_lon", true) - .startObject("fielddata").field("format", randomFrom(Arrays.asList("array", "compressed"))).endObject().endObject().endObject() + .startObject("fielddata").field("format", randomFieldDataFormat()).endObject().endObject().endObject() .endObject().endObject().string(); client().admin().indices().prepareCreate("test") @@ -340,7 +344,7 @@ public class GeoDistanceTests extends ElasticsearchIntegrationTest { public void testDistanceSortingWithMissingGeoPoint() throws Exception { String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1") .startObject("properties").startObject("locations").field("type", "geo_point").field("lat_lon", true) - .startObject("fielddata").field("format", randomFrom(Arrays.asList("array", "compressed"))).endObject().endObject().endObject() + .startObject("fielddata").field("format", randomFieldDataFormat()).endObject().endObject().endObject() .endObject().endObject().string(); client().admin().indices().prepareCreate("test") @@ -442,7 +446,7 @@ public class GeoDistanceTests extends ElasticsearchIntegrationTest { .startObject("properties") .startObject("name").field("type", "string").endObject() .startObject("location").field("type", "geo_point").field("lat_lon", true) - .startObject("fielddata").field("format", randomFrom(Arrays.asList("array", "compressed"))).endObject().endObject() + .startObject("fielddata").field("format", randomFieldDataFormat()).endObject().endObject() .endObject() .endObject() .endObject() @@ -617,7 +621,7 @@ public class GeoDistanceTests extends ElasticsearchIntegrationTest { .field("geohash_precision", 24) .field("lat_lon", true) .startObject("fielddata") - .field("format", randomFrom(Arrays.asList("array", "compressed"))) + .field("format", randomFieldDataFormat()) .endObject() .endObject() .endObject()