Add geo_point to FieldStats

This commit adds a new GeoPoint class to FieldStats for computing field stats over geo_point field types.
This commit is contained in:
Nicholas Knize 2016-12-02 15:32:02 -06:00
parent 1fe74a6b4b
commit 84e4f91253
5 changed files with 181 additions and 33 deletions

View File

@ -124,8 +124,10 @@ public abstract class FieldStats<T> implements Writeable, ToXContent {
return "string";
case 4:
return "ip";
case 5:
return "geo_point";
default:
throw new IllegalArgumentException("Unknown type.");
throw new IllegalArgumentException("Unknown type 1. " + type);
}
}
@ -276,7 +278,7 @@ public abstract class FieldStats<T> implements Writeable, ToXContent {
}
}
private void updateMinMax(T min, T max) {
protected void updateMinMax(T min, T max) {
if (compare(minValue, min) > 0) {
minValue = min;
}
@ -643,6 +645,55 @@ public abstract class FieldStats<T> implements Writeable, ToXContent {
}
}
public static class GeoPoint extends FieldStats<org.elasticsearch.common.geo.GeoPoint> {
public GeoPoint(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq,
boolean isSearchable, boolean isAggregatable) {
super((byte) 5, maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable);
}
public GeoPoint(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq,
boolean isSearchable, boolean isAggregatable,
org.elasticsearch.common.geo.GeoPoint minValue, org.elasticsearch.common.geo.GeoPoint maxValue) {
super((byte) 5, maxDoc, docCount, sumDocFreq, sumTotalTermFreq, isSearchable, isAggregatable,
minValue, maxValue);
}
@Override
public org.elasticsearch.common.geo.GeoPoint valueOf(String value, String fmt) {
return org.elasticsearch.common.geo.GeoPoint.parseFromLatLon(value);
}
@Override
protected void updateMinMax(org.elasticsearch.common.geo.GeoPoint min, org.elasticsearch.common.geo.GeoPoint max) {
minValue.reset(Math.min(min.lat(), minValue.lat()), Math.min(min.lon(), minValue.lon()));
maxValue.reset(Math.max(max.lat(), maxValue.lat()), Math.max(max.lon(), maxValue.lon()));
}
@Override
public int compare(org.elasticsearch.common.geo.GeoPoint p1, org.elasticsearch.common.geo.GeoPoint p2) {
throw new IllegalArgumentException("compare is not supported for geo_point field stats");
}
@Override
public void writeMinMax(StreamOutput out) throws IOException {
out.writeDouble(minValue.lat());
out.writeDouble(minValue.lon());
out.writeDouble(maxValue.lat());
out.writeDouble(maxValue.lon());
}
@Override
public String getMinValueAsString() {
return minValue.toString();
}
@Override
public String getMaxValueAsString() {
return maxValue.toString();
}
}
public static FieldStats readFrom(StreamInput in) throws IOException {
byte type = in.readByte();
long maxDoc = in.readLong();
@ -690,7 +741,7 @@ public abstract class FieldStats<T> implements Writeable, ToXContent {
isSearchable, isAggregatable);
}
case 4:
case 4: {
if (hasMinMax == false) {
return new Ip(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable);
@ -705,9 +756,19 @@ public abstract class FieldStats<T> implements Writeable, ToXContent {
InetAddress max = InetAddressPoint.decode(b2);
return new Ip(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable, min, max);
}
case 5: {
if (hasMinMax == false) {
return new GeoPoint(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable);
}
org.elasticsearch.common.geo.GeoPoint min = new org.elasticsearch.common.geo.GeoPoint(in.readDouble(), in.readDouble());
org.elasticsearch.common.geo.GeoPoint max = new org.elasticsearch.common.geo.GeoPoint(in.readDouble(), in.readDouble());
return new GeoPoint(maxDoc, docCount, sumDocFreq, sumTotalTermFreq,
isSearchable, isAggregatable, min, max);
}
default:
throw new IllegalArgumentException("Unknown type.");
throw new IllegalArgumentException("Unknown type 2. " + type);
}
}

View File

@ -22,7 +22,9 @@ package org.elasticsearch.index.mapper;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.action.fieldstats.FieldStats;
import org.elasticsearch.common.Explicit;
@ -173,17 +175,21 @@ public abstract class BaseGeoPointFieldMapper extends FieldMapper implements Arr
}
@Override
public FieldStats stats(IndexReader reader) throws IOException {
int maxDoc = reader.maxDoc();
FieldInfo fi = org.apache.lucene.index.MultiFields.getMergedFieldInfos(reader).fieldInfo(name());
public FieldStats.GeoPoint stats(IndexReader reader) throws IOException {
String field = name();
FieldInfo fi = org.apache.lucene.index.MultiFields.getMergedFieldInfos(reader).fieldInfo(field);
if (fi == null) {
return null;
}
/**
* we don't have a specific type for geo_point so we use an empty {@link FieldStats.Text}.
* TODO: we should maybe support a new type that knows how to (de)encode the min/max information
*/
return new FieldStats.Text(maxDoc, -1, -1, -1, isSearchable(), isAggregatable());
Terms terms = org.apache.lucene.index.MultiFields.getTerms(reader, field);
if (terms == null) {
return new FieldStats.GeoPoint(reader.maxDoc(), 0L, -1L, -1L, isSearchable(), isAggregatable());
}
GeoPoint minPt = GeoPoint.fromGeohash(NumericUtils.sortableBytesToLong(terms.getMin().bytes, terms.getMin().offset));
GeoPoint maxPt = GeoPoint.fromGeohash(NumericUtils.sortableBytesToLong(terms.getMax().bytes, terms.getMax().offset));
return new FieldStats.GeoPoint(reader.maxDoc(), terms.getDocCount(), -1L, terms.getSumTotalTermFreq(), isSearchable(),
isAggregatable(), minPt, maxPt);
}
}

View File

@ -21,8 +21,13 @@ package org.elasticsearch.index.mapper;
import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.document.LatLonPoint;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.Query;
import org.elasticsearch.action.fieldstats.FieldStats;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.geo.GeoUtils;
@ -120,6 +125,26 @@ public class LatLonPointFieldMapper extends BaseGeoPointFieldMapper {
throw new QueryShardException(context, "Geo fields do not support exact searching, use dedicated geo queries instead: ["
+ name() + "]");
}
@Override
public FieldStats.GeoPoint stats(IndexReader reader) throws IOException {
String field = name();
FieldInfo fi = org.apache.lucene.index.MultiFields.getMergedFieldInfos(reader).fieldInfo(name());
if (fi == null) {
return null;
}
final long size = PointValues.size(reader, field);
if (size == 0) {
return new FieldStats.GeoPoint(reader.maxDoc(), -1L, -1L, -1L, isSearchable(), isAggregatable());
}
final int docCount = PointValues.getDocCount(reader, field);
byte[] min = PointValues.getMinPackedValue(reader, field);
byte[] max = PointValues.getMaxPackedValue(reader, field);
GeoPoint minPt = new GeoPoint(GeoEncodingUtils.decodeLatitude(min, 0), GeoEncodingUtils.decodeLongitude(min, Integer.BYTES));
GeoPoint maxPt = new GeoPoint(GeoEncodingUtils.decodeLatitude(max, 0), GeoEncodingUtils.decodeLongitude(max, Integer.BYTES));
return new FieldStats.GeoPoint(reader.maxDoc(), docCount, -1L, size, isSearchable(), isAggregatable(),
minPt, maxPt);
}
}
@Override

View File

@ -27,8 +27,10 @@ import org.elasticsearch.action.fieldstats.FieldStatsAction;
import org.elasticsearch.action.fieldstats.FieldStatsResponse;
import org.elasticsearch.action.fieldstats.IndexConstraint;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.index.cache.request.RequestCacheStats;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.geo.RandomGeoGenerator;
import java.util.ArrayList;
import java.util.List;
@ -40,6 +42,7 @@ import static org.elasticsearch.action.fieldstats.IndexConstraint.Property.MAX;
import static org.elasticsearch.action.fieldstats.IndexConstraint.Property.MIN;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAllSuccessful;
import static org.hamcrest.Matchers.closeTo;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.nullValue;
@ -60,7 +63,8 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
"long", "type=long",
"integer", "type=integer",
"short", "type=short",
"byte", "type=byte"));
"byte", "type=byte",
"location", "type=geo_point"));
ensureGreen("test");
// index=false
@ -74,7 +78,8 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
"long", "type=long,index=false",
"integer", "type=integer,index=false",
"short", "type=short,index=false",
"byte", "type=byte,index=false"
"byte", "type=byte,index=false",
"location", "type=geo_point,index=false"
));
ensureGreen("test1");
@ -89,7 +94,8 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
"long", "type=long,index=false",
"integer", "type=integer,index=false",
"short", "type=short,index=false",
"byte", "type=byte,index=false"
"byte", "type=byte,index=false",
"location", "type=geo_point,index=false"
));
ensureGreen("test3");
@ -107,6 +113,8 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
double maxFloat = Double.NEGATIVE_INFINITY;
double minDouble = Double.POSITIVE_INFINITY;
double maxDouble = Double.NEGATIVE_INFINITY;
GeoPoint minLoc = new GeoPoint(90, 180);
GeoPoint maxLoc = new GeoPoint(-90, -180);
String minString = new String(Character.toChars(1114111));
String maxString = "0";
@ -135,6 +143,9 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
double d = randomDouble();
minDouble = Math.min(minDouble, d);
maxDouble = Math.max(maxDouble, d);
GeoPoint loc = RandomGeoGenerator.randomPoint(random());
minLoc.reset(Math.min(loc.lat(), minLoc.lat()), Math.min(loc.lon(), minLoc.lon()));
maxLoc.reset(Math.max(loc.lat(), maxLoc.lat()), Math.max(loc.lon(), maxLoc.lon()));
String str = randomRealisticUnicodeOfLength(3);
if (str.compareTo(minString) < 0) {
minString = str;
@ -151,6 +162,7 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
"half_float", hf,
"float", f,
"double", d,
"location", loc,
"string", str)
);
}
@ -158,7 +170,7 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
FieldStatsResponse response = client()
.prepareFieldStats()
.setFields("byte", "short", "integer", "long", "half_float", "float", "double", "string").get();
.setFields("byte", "short", "integer", "long", "half_float", "float", "double", "location", "string").get();
assertAllSuccessful(response);
for (FieldStats<?> stats : response.getAllFieldStats().values()) {
@ -188,6 +200,11 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
assertThat(response.getAllFieldStats().get("double").getMinValue(), equalTo(minDouble));
assertThat(response.getAllFieldStats().get("double").getMaxValue(), equalTo(maxDouble));
assertThat(response.getAllFieldStats().get("double").getDisplayType(), equalTo("float"));
assertThat(((GeoPoint)response.getAllFieldStats().get("location").getMinValue()).lat(), closeTo(minLoc.lat(), 1E-5));
assertThat(((GeoPoint)response.getAllFieldStats().get("location").getMinValue()).lon(), closeTo(minLoc.lon(), 1E-5));
assertThat(((GeoPoint)response.getAllFieldStats().get("location").getMaxValue()).lat(), closeTo(maxLoc.lat(), 1E-5));
assertThat(((GeoPoint)response.getAllFieldStats().get("location").getMaxValue()).lon(), closeTo(maxLoc.lon(), 1E-5));
assertThat(response.getAllFieldStats().get("location").getDisplayType(), equalTo("geo_point"));
}
public void testFieldStatsIndexLevel() throws Exception {
@ -522,6 +539,25 @@ public class FieldStatsIntegrationIT extends ESIntegTestCase {
assertEquals(oldHitCount, indexStats.getHitCount());
}
public void testGeoPointNotIndexed() throws Exception {
assertAcked(prepareCreate("test").addMapping("test", "value", "type=long", "location", "type=geo_point,index=no"));
ensureGreen("test");
client().prepareIndex("test", "test").setSource("value", 1L, "location", new GeoPoint(32, -132)).get();
client().prepareIndex("test", "test").setSource("value", 2L).get();
client().prepareIndex("test", "test").setSource("value", 3L).get();
client().prepareIndex("test", "test").setSource("value", 4L).get();
refresh();
FieldStatsResponse response = client().prepareFieldStats().setFields("value", "location").get();
assertAllSuccessful(response);
assertThat(response.getIndicesMergedFieldStats().size(), equalTo(1));
assertThat(response.getAllFieldStats().get("location").getMinValue(), equalTo(null));
assertThat(response.getAllFieldStats().get("location").getMaxValue(), equalTo(null));
assertThat(response.getAllFieldStats().get("location").isAggregatable(), equalTo(true));
assertThat(response.getAllFieldStats().get("location").isSearchable(), equalTo(false));
}
private void indexRange(String index, long from, long to) throws Exception {
indexRange(index, "value", from, to);
}

View File

@ -34,6 +34,7 @@ setup:
type: text
geo:
type: geo_point
index: no
geo_shape:
type: geo_shape
tree: quadtree
@ -51,7 +52,7 @@ setup:
index: test_2
type: test
id: id_10
body: { foo: "babar", number: 456, bar: "123", geo_shape: {type: "linestring", coordinates : [[-77.03653, 38.897676], [-77.009051, 38.889939]] } }
body: { foo: "babar", number: 456, bar: "123", geo: { lat: 48.858093, lon: 2.294694}, geo_shape: {type: "linestring", coordinates : [[-77.03653, 38.897676], [-77.009051, 38.889939]] } }
- do:
indices.refresh: {}
@ -84,17 +85,17 @@ setup:
- is_false: conflicts
---
"Geopoint field stats":
"Geo field stats":
- skip:
version: " - 5.2.0"
version: " - 5.3.0"
reason: geo_point fields don't return min/max for versions greater than 5.2.0
- do:
field_stats:
fields: [geo, geo_shape]
- match: { indices._all.fields.geo.type: "string" }
- match: { indices._all.fields.geo.max_doc: 1 }
- match: { indices._all.fields.geo.type: "geo_point" }
- match: { indices._all.fields.geo.max_doc: 2 }
- match: { indices._all.fields.geo.doc_count: -1 }
- match: { indices._all.fields.geo.searchable: true }
- match: { indices._all.fields.geo.aggregatable: true }
@ -113,7 +114,6 @@ setup:
- is_false: indices._all.fields.geo_shape.max_value_as_string
- is_false: conflicts
---
"Basic field stats with level set to indices":
- do:
@ -162,9 +162,9 @@ setup:
---
"Geopoint field stats with level set to indices":
"Geo field stats with level set to indices":
- skip:
version: " - 5.2.0"
version: " - 5.3.0"
reason: geo_point fields don't return min/max for versions greater than 5.2.0
- do:
@ -173,15 +173,15 @@ setup:
level: indices
- match: { indices.test_1.fields.geo.max_doc: 1 }
- match: { indices.test_1.fields.geo.doc_count: -1 }
- is_false: indices.test_1.fields.geo.min_value
- is_false: indices.test_1.fields.geo.max_value
- is_false: indices.test_1.fields.geo.min_value_as_string
- is_false: indices.test_1.fields.geo.max_value_as_string
- match: { indices.test_1.fields.geo.doc_count: 1 }
- is_true: indices.test_1.fields.geo.min_value
- is_true: indices.test_1.fields.geo.max_value
- is_true: indices.test_1.fields.geo.min_value_as_string
- is_true: indices.test_1.fields.geo.max_value_as_string
- match: { indices.test_1.fields.geo.searchable: true }
- match: { indices.test_1.fields.geo.aggregatable: true }
- match: { indices.test_1.fields.geo.type: "string" }
- is_false: indices.test_2.fields.geo
- match: { indices.test_1.fields.geo.type: "geo_point" }
- is_true: indices.test_2.fields.geo
- match: { indices.test_2.fields.geo_shape.max_doc: 1 }
- match: { indices.test_2.fields.geo_shape.doc_count: -1 }
- is_false: indices.test_2.fields.geo_shape.min_value
@ -191,7 +191,27 @@ setup:
- match: { indices.test_2.fields.geo_shape.searchable: true }
- match: { indices.test_2.fields.geo_shape.aggregatable: false }
- match: { indices.test_2.fields.geo_shape.type: "string" }
- is_false: indices.test_2.fields.geo
- is_false: conflicts
---
"Geopoint field stats":
- skip:
version: " - 5.3.0"
reason: geo_point type not handled for versions earlier than 6.0.0
- do:
field_stats:
fields: [geo]
level: indices
- match: { indices.test_2.fields.geo.max_doc: 1 }
- match: { indices.test_2.fields.geo.doc_count: -1 }
- is_false: indices.test_2.fields.geo.min_value
- is_false: indices.test_2.fields.geo.max_value
- match: { indices.test_2.fields.geo.searchable: false }
- match: { indices.test_2.fields.geo.aggregatable: true }
- match: { indices.test_2.fields.geo.type: "geo_point" }
- is_true: indices.test_2.fields.geo
- is_false: conflicts
---