mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-27 10:28:28 +00:00
Implement stats for geo_point and geo_shape field (#22391)
Currently `geo_point` and `geo_shape` field are treated as `text` field by the field stats API and we try to extract the min/max values with MultiFields.getTerms. This is ok in master because a `geo_point` field is always a Point field but it can cause problem in 5.x (and 2.x) because the legacy `geo_point` are indexed as terms. As a result the min and max are extracted and then printed in the FieldStats output using BytesRef.utf8ToString which can throw an IndexOutOfBoundException since it's not valid UTF8 strings. This change ensure that we never try to extract min/max information from a `geo_point` field. It does not add a new type for geo points in the fieldstats API so we'll continue to use `text` for this kind of field. This PR is targeted to master even though we could only commit this change to 5.x. I think it's cleaner to have it in master too before we make any decision on https://github.com/elastic/elasticsearch/pull/21947. Fixes #22384
This commit is contained in:
parent
c6573e6e56
commit
360ce532eb
@ -19,14 +19,15 @@
|
||||
|
||||
package org.elasticsearch.index.mapper;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.elasticsearch.common.geo.GeoHashUtils;
|
||||
import org.elasticsearch.ElasticsearchParseException;
|
||||
import org.elasticsearch.action.fieldstats.FieldStats;
|
||||
import org.elasticsearch.common.Explicit;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.geo.GeoHashUtils;
|
||||
import org.elasticsearch.common.geo.GeoPoint;
|
||||
import org.elasticsearch.common.geo.GeoUtils;
|
||||
import org.elasticsearch.common.logging.DeprecationLogger;
|
||||
@ -170,6 +171,20 @@ public abstract class BaseGeoPointFieldMapper extends FieldMapper implements Arr
|
||||
public Query termQuery(Object value, QueryShardContext context) {
|
||||
throw new QueryShardException(context, "Geo fields do not support exact searching, use dedicated geo queries instead: [" + name() + "]");
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldStats stats(IndexReader reader) throws IOException {
|
||||
int maxDoc = reader.maxDoc();
|
||||
FieldInfo fi = org.apache.lucene.index.MultiFields.getMergedFieldInfos(reader).fieldInfo(name());
|
||||
if (fi == null) {
|
||||
return null;
|
||||
}
|
||||
/**
|
||||
* we don't have a specific type for geo_point so we use an empty {@link FieldStats.Text}.
|
||||
* TODO: we should maybe support a new type that knows how to (de)encode the min/max information
|
||||
*/
|
||||
return new FieldStats.Text(maxDoc, -1, -1, -1, isSearchable(), isAggregatable());
|
||||
}
|
||||
}
|
||||
|
||||
protected Explicit<Boolean> ignoreMalformed;
|
||||
|
@ -18,12 +18,11 @@
|
||||
*/
|
||||
package org.elasticsearch.index.mapper;
|
||||
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
import org.locationtech.spatial4j.shape.Shape;
|
||||
import org.locationtech.spatial4j.shape.jts.JtsGeometry;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.spatial.prefix.PrefixTreeStrategy;
|
||||
import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
|
||||
@ -33,8 +32,8 @@ import org.apache.lucene.spatial.prefix.tree.PackedQuadPrefixTree;
|
||||
import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree;
|
||||
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.action.fieldstats.FieldStats;
|
||||
import org.elasticsearch.common.Explicit;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.geo.GeoUtils;
|
||||
import org.elasticsearch.common.geo.SpatialStrategy;
|
||||
import org.elasticsearch.common.geo.builders.ShapeBuilder;
|
||||
@ -45,6 +44,9 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.support.XContentMapValues;
|
||||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
import org.elasticsearch.index.query.QueryShardException;
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
import org.locationtech.spatial4j.shape.Shape;
|
||||
import org.locationtech.spatial4j.shape.jts.JtsGeometry;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
@ -415,6 +417,20 @@ public class GeoShapeFieldMapper extends FieldMapper {
|
||||
public Query termQuery(Object value, QueryShardContext context) {
|
||||
throw new QueryShardException(context, "Geo fields do not support exact searching, use dedicated geo queries instead");
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldStats stats(IndexReader reader) throws IOException {
|
||||
int maxDoc = reader.maxDoc();
|
||||
FieldInfo fi = org.apache.lucene.index.MultiFields.getMergedFieldInfos(reader).fieldInfo(name());
|
||||
if (fi == null) {
|
||||
return null;
|
||||
}
|
||||
/**
|
||||
* we don't have a specific type for geo_shape so we use an empty {@link FieldStats.Text}.
|
||||
* TODO: we should maybe support a new type that knows how to (de)encode the min/max information
|
||||
*/
|
||||
return new FieldStats.Text(maxDoc, -1, -1, -1, isSearchable(), isAggregatable());
|
||||
}
|
||||
}
|
||||
|
||||
protected Explicit<Boolean> coerce;
|
||||
|
@ -12,6 +12,12 @@ setup:
|
||||
type: long
|
||||
bar:
|
||||
type: long
|
||||
geo:
|
||||
type: geo_point
|
||||
geo_shape:
|
||||
type: geo_shape
|
||||
tree: quadtree
|
||||
precision: 1m
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
@ -26,20 +32,26 @@ setup:
|
||||
type: long
|
||||
bar:
|
||||
type: text
|
||||
geo:
|
||||
type: geo_point
|
||||
geo_shape:
|
||||
type: geo_shape
|
||||
tree: quadtree
|
||||
precision: 1m
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test_1
|
||||
type: test
|
||||
id: id_1
|
||||
body: { foo: "bar", number: 123, bar: 123 }
|
||||
body: { foo: "bar", number: 123, bar: 123, geo: { lat: 48.858093, lon: 2.294694} }
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test_2
|
||||
type: test
|
||||
id: id_10
|
||||
body: { foo: "babar", number: 456, bar: "123" }
|
||||
body: { foo: "babar", number: 456, bar: "123", geo_shape: {type: "linestring", coordinates : [[-77.03653, 38.897676], [-77.009051, 38.889939]] } }
|
||||
|
||||
- do:
|
||||
indices.refresh: {}
|
||||
@ -48,7 +60,7 @@ setup:
|
||||
"Basic field stats":
|
||||
- do:
|
||||
field_stats:
|
||||
fields: [foo, number]
|
||||
fields: [foo, number, geo, geo_shape]
|
||||
|
||||
- match: { indices._all.fields.foo.max_doc: 2 }
|
||||
- match: { indices._all.fields.foo.doc_count: 2 }
|
||||
@ -68,13 +80,54 @@ setup:
|
||||
- match: { indices._all.fields.number.max_value: 456 }
|
||||
- match: { indices._all.fields.number.max_value_as_string: "456" }
|
||||
- match: { indices._all.fields.number.type: "integer" }
|
||||
- match: { indices._all.fields.geo.type: "string" }
|
||||
- match: { indices._all.fields.geo.max_doc: 1 }
|
||||
- match: { indices._all.fields.geo.doc_count: -1 }
|
||||
- match: { indices._all.fields.geo.searchable: true }
|
||||
- match: { indices._all.fields.geo.aggregatable: true }
|
||||
- match: { indices._all.fields.geo_shape.type: "string" }
|
||||
- match: { indices._all.fields.geo_shape.max_doc: 1 }
|
||||
- match: { indices._all.fields.geo_shape.searchable: true }
|
||||
- match: { indices._all.fields.geo_shape.aggregatable: false }
|
||||
|
||||
- is_false: conflicts
|
||||
|
||||
---
|
||||
"Geopoint field stats":
|
||||
- skip:
|
||||
version: " - 5.2.0"
|
||||
reason: geo_point fields don't return min/max for versions greater than 5.2.0
|
||||
|
||||
- do:
|
||||
field_stats:
|
||||
fields: [geo, geo_shape]
|
||||
|
||||
- match: { indices._all.fields.geo.type: "string" }
|
||||
- match: { indices._all.fields.geo.max_doc: 1 }
|
||||
- match: { indices._all.fields.geo.doc_count: -1 }
|
||||
- match: { indices._all.fields.geo.searchable: true }
|
||||
- match: { indices._all.fields.geo.aggregatable: true }
|
||||
- is_false: indices._all.fields.geo.min_value
|
||||
- is_false: indices._all.fields.geo.max_value
|
||||
- is_false: indices._all.fields.geo.min_value_as_string
|
||||
- is_false: indices._all.fields.geo.max_value_as_string
|
||||
- match: { indices._all.fields.geo_shape.type: "string" }
|
||||
- match: { indices._all.fields.geo_shape.max_doc: 1 }
|
||||
- match: { indices._all.fields.geo_shape.doc_count: -1 }
|
||||
- match: { indices._all.fields.geo_shape.searchable: true }
|
||||
- match: { indices._all.fields.geo_shape.aggregatable: false }
|
||||
- is_false: indices._all.fields.geo_shape.min_value
|
||||
- is_false: indices._all.fields.geo_shape.max_value
|
||||
- is_false: indices._all.fields.geo_shape.min_value_as_string
|
||||
- is_false: indices._all.fields.geo_shape.max_value_as_string
|
||||
- is_false: conflicts
|
||||
|
||||
|
||||
---
|
||||
"Basic field stats with level set to indices":
|
||||
- do:
|
||||
field_stats:
|
||||
fields: [foo, number]
|
||||
fields: [foo, number, geo, geo_shape]
|
||||
level: indices
|
||||
|
||||
- match: { indices.test_1.fields.foo.max_doc: 1 }
|
||||
@ -95,6 +148,10 @@ setup:
|
||||
- match: { indices.test_1.fields.number.max_value: 123 }
|
||||
- match: { indices.test_1.fields.number.max_value_as_string: "123" }
|
||||
- match: { indices.test_1.fields.number.type: "integer" }
|
||||
- match: { indices.test_1.fields.geo.type: "string" }
|
||||
- match: { indices.test_1.fields.geo.max_doc: 1 }
|
||||
- match: { indices.test_1.fields.geo.searchable: true }
|
||||
- match: { indices.test_1.fields.geo.aggregatable: true }
|
||||
- match: { indices.test_2.fields.foo.max_doc: 1 }
|
||||
- match: { indices.test_2.fields.foo.doc_count: 1 }
|
||||
- match: { indices.test_2.fields.foo.min_value: "babar" }
|
||||
@ -114,6 +171,45 @@ setup:
|
||||
- match: { indices.test_2.fields.number.max_value: 456 }
|
||||
- match: { indices.test_2.fields.number.max_value_as_string: "456" }
|
||||
- match: { indices.test_2.fields.number.type: "integer" }
|
||||
- match: { indices.test_2.fields.geo_shape.type: "string" }
|
||||
- match: { indices.test_2.fields.geo_shape.max_doc: 1 }
|
||||
- match: { indices.test_2.fields.geo_shape.searchable: true }
|
||||
- match: { indices.test_2.fields.geo_shape.aggregatable: false }
|
||||
- is_false: indices.test_2.fields.geo
|
||||
- is_false: conflicts
|
||||
|
||||
|
||||
---
|
||||
"Geopoint field stats with level set to indices":
|
||||
- skip:
|
||||
version: " - 5.2.0"
|
||||
reason: geo_point fields don't return min/max for versions greater than 5.2.0
|
||||
|
||||
- do:
|
||||
field_stats:
|
||||
fields: [geo, geo_shape]
|
||||
level: indices
|
||||
|
||||
- match: { indices.test_1.fields.geo.max_doc: 1 }
|
||||
- match: { indices.test_1.fields.geo.doc_count: -1 }
|
||||
- is_false: indices.test_1.fields.geo.min_value
|
||||
- is_false: indices.test_1.fields.geo.max_value
|
||||
- is_false: indices.test_1.fields.geo.min_value_as_string
|
||||
- is_false: indices.test_1.fields.geo.max_value_as_string
|
||||
- match: { indices.test_1.fields.geo.searchable: true }
|
||||
- match: { indices.test_1.fields.geo.aggregatable: true }
|
||||
- match: { indices.test_1.fields.geo.type: "string" }
|
||||
- is_false: indices.test_2.fields.geo
|
||||
- match: { indices.test_2.fields.geo_shape.max_doc: 1 }
|
||||
- match: { indices.test_2.fields.geo_shape.doc_count: -1 }
|
||||
- is_false: indices.test_2.fields.geo_shape.min_value
|
||||
- is_false: indices.test_2.fields.geo_shape.max_value
|
||||
- is_false: indices.test_2.fields.geo_shape.min_value_as_string
|
||||
- is_false: indices.test_2.fields.geo_shape.max_value_as_string
|
||||
- match: { indices.test_2.fields.geo_shape.searchable: true }
|
||||
- match: { indices.test_2.fields.geo_shape.aggregatable: false }
|
||||
- match: { indices.test_2.fields.geo_shape.type: "string" }
|
||||
- is_false: indices.test_2.fields.geo
|
||||
- is_false: conflicts
|
||||
|
||||
---
|
||||
|
Loading…
x
Reference in New Issue
Block a user