Doc values for geo points.

This commits add doc values support to geo point using the exact same approach
as for numeric data: geo points for a given document are stored uncompressed
and sequentially in a single binary doc values field.

Close #4207
This commit is contained in:
Adrien Grand 2013-12-26 14:52:45 +01:00
parent 9eb7441543
commit 05448b6276
7 changed files with 273 additions and 51 deletions

View File

@ -89,6 +89,9 @@ field data format.
`array` (default):: `array` (default)::
Stores latitudes and longitudes in arrays. Stores latitudes and longitudes in arrays.
`doc_values`::
Computes and stores field data data-structures on disk at indexing time.
[float] [float]
=== Fielddata loading === Fielddata loading

View File

@ -75,6 +75,7 @@ public class IndexFieldDataService extends AbstractIndexComponent {
.put("short", new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.SHORT)) .put("short", new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.SHORT))
.put("int", new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.INT)) .put("int", new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.INT))
.put("long", new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.LONG)) .put("long", new DocValuesIndexFieldData.Builder().numericType(IndexNumericFieldData.NumericType.LONG))
.put("geo_point", new GeoPointBinaryDVIndexFieldData.Builder())
.immutableMap(); .immutableMap();
buildersByTypeAndFormat = MapBuilder.<Tuple<String, String>, IndexFieldData.Builder>newMapBuilder() buildersByTypeAndFormat = MapBuilder.<Tuple<String, String>, IndexFieldData.Builder>newMapBuilder()
@ -108,6 +109,7 @@ public class IndexFieldDataService extends AbstractIndexComponent {
.put(Tuple.tuple("long", DISABLED_FORMAT), new DisabledIndexFieldData.Builder()) .put(Tuple.tuple("long", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())
.put(Tuple.tuple("geo_point", ARRAY_FORMAT), new GeoPointDoubleArrayIndexFieldData.Builder()) .put(Tuple.tuple("geo_point", ARRAY_FORMAT), new GeoPointDoubleArrayIndexFieldData.Builder())
.put(Tuple.tuple("geo_point", DOC_VALUES_FORMAT), new GeoPointBinaryDVIndexFieldData.Builder())
.put(Tuple.tuple("geo_point", DISABLED_FORMAT), new DisabledIndexFieldData.Builder()) .put(Tuple.tuple("geo_point", DISABLED_FORMAT), new DisabledIndexFieldData.Builder())
.put(Tuple.tuple("geo_point", COMPRESSED_FORMAT), new GeoPointCompressedIndexFieldData.Builder()) .put(Tuple.tuple("geo_point", COMPRESSED_FORMAT), new GeoPointCompressedIndexFieldData.Builder())

View File

@ -0,0 +1,105 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.util.ByteUtils;
import org.elasticsearch.index.fielddata.AtomicGeoPointFieldData;
import org.elasticsearch.index.fielddata.GeoPointValues;
import org.elasticsearch.index.fielddata.ScriptDocValues;
final class GeoPointBinaryDVAtomicFieldData extends AtomicGeoPointFieldData<ScriptDocValues> {
private final AtomicReader reader;
private final BinaryDocValues values;
GeoPointBinaryDVAtomicFieldData(AtomicReader reader, BinaryDocValues values) {
super();
this.reader = reader;
this.values = values == null ? BinaryDocValues.EMPTY : values;
}
@Override
public boolean isMultiValued() {
return false;
}
@Override
public boolean isValuesOrdered() {
return false;
}
@Override
public int getNumDocs() {
return reader.maxDoc();
}
@Override
public long getNumberUniqueValues() {
return Long.MAX_VALUE;
}
@Override
public long getMemorySizeInBytes() {
return -1; // not exposed by Lucene
}
@Override
public ScriptDocValues getScriptValues() {
return new ScriptDocValues.GeoPoints(getGeoPointValues());
}
@Override
public void close() {
// no-op
}
@Override
public GeoPointValues getGeoPointValues() {
return new GeoPointValues(true) {
final BytesRef bytes = new BytesRef();
int i = Integer.MAX_VALUE;
int valueCount = 0;
final GeoPoint point = new GeoPoint();
@Override
public int setDocument(int docId) {
values.get(docId, bytes);
assert bytes.length % 16 == 0;
i = 0;
return valueCount = (bytes.length >>> 4);
}
@Override
public GeoPoint nextValue() {
assert i < 2 * valueCount;
final double lat = ByteUtils.readDoubleLE(bytes.bytes, bytes.offset + i++ * 8);
final double lon = ByteUtils.readDoubleLE(bytes.bytes, bytes.offset + i++ * 8);
return point.reset(lat, lon);
}
};
}
}

View File

@ -0,0 +1,74 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.index.AtomicReaderContext;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.FieldMapper.Names;
import java.io.IOException;
public class GeoPointBinaryDVIndexFieldData extends DocValuesIndexFieldData implements IndexGeoPointFieldData<AtomicGeoPointFieldData<ScriptDocValues>> {
public GeoPointBinaryDVIndexFieldData(Index index, Names fieldNames) {
super(index, fieldNames);
}
@Override
public boolean valuesOrdered() {
return false;
}
@Override
public final XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
throw new ElasticSearchIllegalArgumentException("can't sort on geo_point field without using specific sorting feature, like geo_distance");
}
@Override
public AtomicGeoPointFieldData<ScriptDocValues> load(AtomicReaderContext context) {
try {
return new GeoPointBinaryDVAtomicFieldData(context.reader(), context.reader().getBinaryDocValues(fieldNames.indexName()));
} catch (IOException e) {
throw new ElasticSearchIllegalStateException("Cannot load doc values", e);
}
}
@Override
public AtomicGeoPointFieldData<ScriptDocValues> loadDirect(AtomicReaderContext context) throws Exception {
return load(context);
}
public static class Builder implements IndexFieldData.Builder {
@Override
public IndexFieldData<?> build(Index index, Settings indexSettings, FieldMapper<?> mapper, IndexFieldDataCache cache) {
final FieldMapper.Names fieldNames = mapper.names();
return new GeoPointBinaryDVIndexFieldData(index, fieldNames);
}
}
}

View File

@ -19,9 +19,13 @@
package org.elasticsearch.index.mapper.geo; package org.elasticsearch.index.mapper.geo;
import com.carrotsearch.hppc.ObjectOpenHashSet;
import com.carrotsearch.hppc.cursors.ObjectCursor;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.ElasticSearchIllegalStateException; import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Nullable;
@ -32,6 +36,7 @@ import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.geo.GeoUtils; import org.elasticsearch.common.geo.GeoUtils;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.DistanceUnit; import org.elasticsearch.common.unit.DistanceUnit;
import org.elasticsearch.common.util.ByteUtils;
import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.common.xcontent.support.XContentMapValues;
@ -40,14 +45,13 @@ import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider; import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
import org.elasticsearch.index.fielddata.FieldDataType; import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.mapper.*; import org.elasticsearch.index.mapper.*;
import org.elasticsearch.index.mapper.core.AbstractFieldMapper; import org.elasticsearch.index.mapper.core.*;
import org.elasticsearch.index.mapper.core.DoubleFieldMapper; import org.elasticsearch.index.mapper.core.NumberFieldMapper.CustomNumericDocValuesField;
import org.elasticsearch.index.mapper.core.NumberFieldMapper;
import org.elasticsearch.index.mapper.core.StringFieldMapper;
import org.elasticsearch.index.mapper.object.ArrayValueMapperParser; import org.elasticsearch.index.mapper.object.ArrayValueMapperParser;
import org.elasticsearch.index.similarity.SimilarityProvider; import org.elasticsearch.index.similarity.SimilarityProvider;
import java.io.IOException; import java.io.IOException;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
@ -413,10 +417,6 @@ public class GeoPointFieldMapper extends AbstractFieldMapper<GeoPoint> implement
this.normalizeLat = normalizeLat; this.normalizeLat = normalizeLat;
this.normalizeLon = normalizeLon; this.normalizeLon = normalizeLon;
if (hasDocValues()) {
throw new ElasticSearchIllegalStateException("Geo points don't support doc values"); // yet
}
} }
@Override @Override
@ -571,42 +571,15 @@ public class GeoPointFieldMapper extends AbstractFieldMapper<GeoPoint> implement
} }
private void parseLatLon(ParseContext context, double lat, double lon) throws IOException { private void parseLatLon(ParseContext context, double lat, double lon) throws IOException {
if (normalizeLat || normalizeLon) { parse(context, new GeoPoint(lat, lon), null);
GeoPoint point = new GeoPoint(lat, lon);
GeoUtils.normalizePoint(point, normalizeLat, normalizeLon);
lat = point.lat();
lon = point.lon();
}
if (validateLat) {
if (lat > 90.0 || lat < -90.0) {
throw new ElasticSearchIllegalArgumentException("illegal latitude value [" + lat + "] for " + name());
}
}
if (validateLon) {
if (lon > 180.0 || lon < -180) {
throw new ElasticSearchIllegalArgumentException("illegal longitude value [" + lon + "] for " + name());
}
}
if (fieldType.indexed() || fieldType.stored()) {
Field field = new Field(names.indexName(), Double.toString(lat) + ',' + Double.toString(lon), fieldType);
context.doc().add(field);
}
if (enableGeoHash) {
parseGeohashField(context, GeoHashUtils.encode(lat, lon, geoHashPrecision));
}
if (enableLatLon) {
context.externalValue(lat);
latMapper.parse(context);
context.externalValue(lon);
lonMapper.parse(context);
}
} }
private void parseGeohash(ParseContext context, String geohash) throws IOException { private void parseGeohash(ParseContext context, String geohash) throws IOException {
GeoPoint point = GeoHashUtils.decode(geohash); GeoPoint point = GeoHashUtils.decode(geohash);
parse(context, point, geohash);
}
private void parse(ParseContext context, GeoPoint point, String geohash) throws IOException {
if (normalizeLat || normalizeLon) { if (normalizeLat || normalizeLon) {
GeoUtils.normalizePoint(point, normalizeLat, normalizeLon); GeoUtils.normalizePoint(point, normalizeLat, normalizeLon);
} }
@ -627,6 +600,9 @@ public class GeoPointFieldMapper extends AbstractFieldMapper<GeoPoint> implement
context.doc().add(field); context.doc().add(field);
} }
if (enableGeoHash) { if (enableGeoHash) {
if (geohash == null) {
geohash = GeoHashUtils.encode(point.lat(), point.lon());
}
parseGeohashField(context, geohash); parseGeohashField(context, geohash);
} }
if (enableLatLon) { if (enableLatLon) {
@ -635,6 +611,15 @@ public class GeoPointFieldMapper extends AbstractFieldMapper<GeoPoint> implement
context.externalValue(point.lon()); context.externalValue(point.lon());
lonMapper.parse(context); lonMapper.parse(context);
} }
if (hasDocValues()) {
CustomGeoPointDocValuesField field = (CustomGeoPointDocValuesField) context.doc().getByKey(names().indexName());
if (field == null) {
field = new CustomGeoPointDocValuesField(names().indexName(), point.lat(), point.lon());
context.doc().addWithKey(names().indexName(), field);
} else {
field.add(point.lat(), point.lon());
}
}
} }
@Override @Override
@ -716,4 +701,38 @@ public class GeoPointFieldMapper extends AbstractFieldMapper<GeoPoint> implement
} }
} }
public static class CustomGeoPointDocValuesField extends CustomNumericDocValuesField {
public static final FieldType TYPE = new FieldType();
static {
TYPE.setDocValueType(FieldInfo.DocValuesType.BINARY);
TYPE.freeze();
}
private final ObjectOpenHashSet<GeoPoint> points;
public CustomGeoPointDocValuesField(String name, double lat, double lon) {
super(name);
points = new ObjectOpenHashSet<GeoPoint>(2);
points.add(new GeoPoint(lat, lon));
}
public void add(double lat, double lon) {
points.add(new GeoPoint(lat, lon));
}
@Override
public BytesRef binaryValue() {
final byte[] bytes = new byte[points.size() * 16];
int off = 0;
for (Iterator<ObjectCursor<GeoPoint>> it = points.iterator(); it.hasNext(); ) {
final GeoPoint point = it.next().value;
ByteUtils.writeDoubleLE(point.getLat(), bytes, off);
ByteUtils.writeDoubleLE(point.getLon(), bytes, off + 8);
off += 16;
}
return new BytesRef(bytes);
}
}
} }

View File

@ -369,19 +369,33 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
} }
public void testDuelGeoPoints() throws Exception { public void testDuelGeoPoints() throws Exception {
final String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("geopoint").field("type", "geo_point").startObject("fielddata").field("format", "doc_values").endObject().endObject()
.endObject().endObject().endObject().string();
final DocumentMapper mapper = MapperTestUtils.newParser().parse(mapping);
Random random = getRandom(); Random random = getRandom();
int atLeast = atLeast(random, 1000); int atLeast = atLeast(random, 1000);
int maxValuesPerDoc = randomIntBetween(1, 3); int maxValuesPerDoc = randomBoolean() ? 1 : randomIntBetween(2, 40);
// to test deduplication
double defaultLat = randomDouble() * 180 - 90;
double defaultLon = randomDouble() * 360 - 180;
for (int i = 0; i < atLeast; i++) { for (int i = 0; i < atLeast; i++) {
Document d = new Document();
d.add(new StringField("_id", "" + i, Field.Store.NO));
final int numValues = randomInt(maxValuesPerDoc); final int numValues = randomInt(maxValuesPerDoc);
XContentBuilder doc = XContentFactory.jsonBuilder().startObject().startArray("geopoint");
for (int j = 0; j < numValues; ++j) { for (int j = 0; j < numValues; ++j) {
final double lat = randomDouble() * 180 - 90; if (randomBoolean()) {
final double lon = randomDouble() * 360 - 180; doc.startObject().field("lat", defaultLat).field("lon", defaultLon).endObject();
d.add(new StringField("geopoint", lat + "," + lon, Field.Store.NO)); } else {
doc.startObject().field("lat", randomDouble() * 180 - 90).field("lon", randomDouble() * 360 - 180).endObject();
}
} }
writer.addDocument(d); doc = doc.endArray().endObject();
final ParsedDocument d = mapper.parse("type", Integer.toString(i), doc.bytes());
writer.addDocument(d.rootDoc());
if (random.nextInt(10) == 0) { if (random.nextInt(10) == 0) {
refreshReader(); refreshReader();
} }
@ -391,6 +405,7 @@ public class DuelFieldDataTests extends AbstractFieldDataTests {
final Distance precision = new Distance(1, randomFrom(DistanceUnit.values())); final Distance precision = new Distance(1, randomFrom(DistanceUnit.values()));
typeMap.put(new FieldDataType("geo_point", ImmutableSettings.builder().put("format", "array")), Type.GeoPoint); typeMap.put(new FieldDataType("geo_point", ImmutableSettings.builder().put("format", "array")), Type.GeoPoint);
typeMap.put(new FieldDataType("geo_point", ImmutableSettings.builder().put("format", "compressed").put("precision", precision)), Type.GeoPoint); typeMap.put(new FieldDataType("geo_point", ImmutableSettings.builder().put("format", "compressed").put("precision", precision)), Type.GeoPoint);
typeMap.put(new FieldDataType("geo_point", ImmutableSettings.builder().put("format", "doc_values")), Type.GeoPoint);
ArrayList<Entry<FieldDataType, Type>> list = new ArrayList<Entry<FieldDataType, Type>>(typeMap.entrySet()); ArrayList<Entry<FieldDataType, Type>> list = new ArrayList<Entry<FieldDataType, Type>>(typeMap.entrySet());
while (!list.isEmpty()) { while (!list.isEmpty()) {

View File

@ -52,11 +52,15 @@ import static org.hamcrest.Matchers.*;
*/ */
public class GeoDistanceTests extends ElasticsearchIntegrationTest { public class GeoDistanceTests extends ElasticsearchIntegrationTest {
private static String randomFieldDataFormat() {
return randomFrom(Arrays.asList("array", "compressed", "doc_values"));
}
@Test @Test
public void simpleDistanceTests() throws Exception { public void simpleDistanceTests() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1") String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties").startObject("location").field("type", "geo_point").field("lat_lon", true) .startObject("properties").startObject("location").field("type", "geo_point").field("lat_lon", true)
.startObject("fielddata").field("format", randomFrom(Arrays.asList("array", "compressed"))).endObject().endObject().endObject() .startObject("fielddata").field("format", randomFieldDataFormat()).endObject().endObject().endObject()
.endObject().endObject().string(); .endObject().endObject().string();
client().admin().indices().prepareCreate("test").addMapping("type1", mapping).execute().actionGet(); client().admin().indices().prepareCreate("test").addMapping("type1", mapping).execute().actionGet();
client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().execute().actionGet(); client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().execute().actionGet();
@ -208,7 +212,7 @@ public class GeoDistanceTests extends ElasticsearchIntegrationTest {
public void testDistanceSortingMVFields() throws Exception { public void testDistanceSortingMVFields() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1") String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties").startObject("locations").field("type", "geo_point").field("lat_lon", true) .startObject("properties").startObject("locations").field("type", "geo_point").field("lat_lon", true)
.startObject("fielddata").field("format", randomFrom(Arrays.asList("array", "compressed"))).endObject().endObject().endObject() .startObject("fielddata").field("format", randomFieldDataFormat()).endObject().endObject().endObject()
.endObject().endObject().string(); .endObject().endObject().string();
client().admin().indices().prepareCreate("test") client().admin().indices().prepareCreate("test")
@ -340,7 +344,7 @@ public class GeoDistanceTests extends ElasticsearchIntegrationTest {
public void testDistanceSortingWithMissingGeoPoint() throws Exception { public void testDistanceSortingWithMissingGeoPoint() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1") String mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties").startObject("locations").field("type", "geo_point").field("lat_lon", true) .startObject("properties").startObject("locations").field("type", "geo_point").field("lat_lon", true)
.startObject("fielddata").field("format", randomFrom(Arrays.asList("array", "compressed"))).endObject().endObject().endObject() .startObject("fielddata").field("format", randomFieldDataFormat()).endObject().endObject().endObject()
.endObject().endObject().string(); .endObject().endObject().string();
client().admin().indices().prepareCreate("test") client().admin().indices().prepareCreate("test")
@ -442,7 +446,7 @@ public class GeoDistanceTests extends ElasticsearchIntegrationTest {
.startObject("properties") .startObject("properties")
.startObject("name").field("type", "string").endObject() .startObject("name").field("type", "string").endObject()
.startObject("location").field("type", "geo_point").field("lat_lon", true) .startObject("location").field("type", "geo_point").field("lat_lon", true)
.startObject("fielddata").field("format", randomFrom(Arrays.asList("array", "compressed"))).endObject().endObject() .startObject("fielddata").field("format", randomFieldDataFormat()).endObject().endObject()
.endObject() .endObject()
.endObject() .endObject()
.endObject() .endObject()
@ -617,7 +621,7 @@ public class GeoDistanceTests extends ElasticsearchIntegrationTest {
.field("geohash_precision", 24) .field("geohash_precision", 24)
.field("lat_lon", true) .field("lat_lon", true)
.startObject("fielddata") .startObject("fielddata")
.field("format", randomFrom(Arrays.asList("array", "compressed"))) .field("format", randomFieldDataFormat())
.endObject() .endObject()
.endObject() .endObject()
.endObject() .endObject()