"CONTAINS" support for BKD-backed geo_shape and shape fields (#50141) (#50213)

Lucene 8.4 added support for "CONTAINS", therefore in this commit those
changes are integrated in Elasticsearch. This commit contains as well a
bug fix when querying with a geometry collection with "DISJOINT" relation.
This commit is contained in:
Ignacio Vera 2019-12-16 09:17:51 +01:00 committed by GitHub
parent c732d9923d
commit 3717c733ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 250 additions and 27 deletions

View File

@ -142,9 +142,8 @@ The following features are not yet supported with the new indexing approach:
using a `bool` query with each individual point.
* `CONTAINS` relation query - when using the new default vector indexing strategy, `geo_shape`
queries with `relation` defined as `contains` are not yet supported. If this query relation
is an absolute necessity, it is recommended to set `strategy` to `quadtree` and use the
deprecated PrefixTree strategy indexing approach.
queries with `relation` defined as `contains` are supported for indices created with
ElasticSearch 7.5.0 or higher.
[[prefix-trees]]
[float]

View File

@ -74,8 +74,8 @@ The following features are not yet supported:
over each individual point. For now, if this is absolutely needed, this can be achieved
using a `bool` query with each individual point. (Note: this could be very costly)
* `CONTAINS` relation query - `shape` queries with `relation` defined as `contains` are not
yet supported.
* `CONTAINS` relation query - `shape` queries with `relation` defined as `contains` are supported
for indices created with ElasticSearch 7.5.0 or higher.
[float]
===== Example
@ -445,4 +445,4 @@ POST /example/_doc
Due to the complex input structure and index representation of shapes,
it is not currently possible to sort shapes or retrieve their fields
directly. The `shape` value is only retrievable through the `_source`
field.
field.

View File

@ -151,8 +151,7 @@ has nothing in common with the query geometry.
* `WITHIN` - Return all documents whose `geo_shape` field
is within the query geometry.
* `CONTAINS` - Return all documents whose `geo_shape` field
contains the query geometry. Note: this is only supported using the
`recursive` Prefix Tree Strategy deprecated[6.6]
contains the query geometry.
[float]
==== Ignore Unmapped

View File

@ -171,12 +171,14 @@ GET /example/_search
The following is a complete list of spatial relation operators available:
* `INTERSECTS` - (default) Return all documents whose `geo_shape` field
* `INTERSECTS` - (default) Return all documents whose `shape` field
intersects the query geometry.
* `DISJOINT` - Return all documents whose `geo_shape` field
* `DISJOINT` - Return all documents whose `shape` field
has nothing in common with the query geometry.
* `WITHIN` - Return all documents whose `geo_shape` field
* `WITHIN` - Return all documents whose `shape` field
is within the query geometry.
* `CONTAINS` - Return all documents whose `shape` field
contains the query geometry.
[float]
==== Ignore Unmapped

View File

@ -69,6 +69,7 @@ public enum ShapeRelation implements Writeable {
case INTERSECTS: return QueryRelation.INTERSECTS;
case DISJOINT: return QueryRelation.DISJOINT;
case WITHIN: return QueryRelation.WITHIN;
case CONTAINS: return QueryRelation.CONTAINS;
default:
throw new IllegalArgumentException("ShapeRelation [" + this + "] not supported");
}

View File

@ -20,12 +20,14 @@
package org.elasticsearch.index.query;
import org.apache.lucene.document.LatLonShape;
import org.apache.lucene.document.ShapeField;
import org.apache.lucene.geo.Line;
import org.apache.lucene.geo.Polygon;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.Version;
import org.elasticsearch.common.geo.GeoShapeType;
import org.elasticsearch.common.geo.ShapeRelation;
import org.elasticsearch.geometry.Circle;
@ -49,10 +51,10 @@ public class VectorGeoShapeQueryProcessor implements AbstractGeometryFieldMapper
@Override
public Query process(Geometry shape, String fieldName, ShapeRelation relation, QueryShardContext context) {
// CONTAINS queries are not yet supported by VECTOR strategy
if (relation == ShapeRelation.CONTAINS) {
// CONTAINS queries are not supported by VECTOR strategy for indices created before version 7.5.0 (Lucene 8.3.0)
if (relation == ShapeRelation.CONTAINS && context.indexVersionCreated().before(Version.V_7_5_0)) {
throw new QueryShardException(context,
ShapeRelation.CONTAINS + " query relation not supported for Field [" + fieldName + "]");
ShapeRelation.CONTAINS + " query relation not supported for Field [" + fieldName + "].");
}
// wrap geoQuery as a ConstantScoreQuery
return getVectorQueryFromShape(shape, fieldName, relation, context);
@ -95,12 +97,21 @@ public class VectorGeoShapeQueryProcessor implements AbstractGeometryFieldMapper
}
private void visit(BooleanQuery.Builder bqb, GeometryCollection<?> collection) {
BooleanClause.Occur occur;
if (relation == ShapeRelation.CONTAINS || relation == ShapeRelation.DISJOINT) {
// all shapes must be disjoint / must be contained in relation to the indexed shape.
occur = BooleanClause.Occur.MUST;
} else {
// at least one shape must intersect / contain the indexed shape.
occur = BooleanClause.Occur.SHOULD;
}
for (Geometry shape : collection) {
if (shape instanceof MultiPoint) {
// Flatten multipoints
// Flatten multi-points
// We do not support multi-point queries?
visit(bqb, (GeometryCollection<?>) shape);
} else {
bqb.add(shape.visit(this), BooleanClause.Occur.SHOULD);
bqb.add(shape.visit(this), occur);
}
}
}
@ -144,7 +155,13 @@ public class VectorGeoShapeQueryProcessor implements AbstractGeometryFieldMapper
@Override
public Query visit(Point point) {
validateIsGeoShapeFieldType();
return LatLonShape.newBoxQuery(fieldName, relation.getLuceneRelation(),
ShapeField.QueryRelation luceneRelation = relation.getLuceneRelation();
if (luceneRelation == ShapeField.QueryRelation.CONTAINS) {
// contains and intersects are equivalent but the implementation of
// intersects is more efficient.
luceneRelation = ShapeField.QueryRelation.INTERSECTS;
}
return LatLonShape.newBoxQuery(fieldName, luceneRelation,
point.getY(), point.getY(), point.getX(), point.getX());
}

View File

@ -483,10 +483,30 @@ public class GeoShapeQueryTests extends ESSingleNodeTestCase {
public void testContainsShapeQuery() throws Exception {
// Create a random geometry collection.
Rectangle mbr = xRandomRectangle(random(), xRandomPoint(random()), true);
GeometryCollectionBuilder gcb = createGeometryCollectionWithin(random(), mbr);
boolean usePrefixTrees = randomBoolean();
GeometryCollectionBuilder gcb;
if (usePrefixTrees) {
gcb = createGeometryCollectionWithin(random(), mbr);
} else {
// vector strategy does not yet support multipoint queries
gcb = new GeometryCollectionBuilder();
int numShapes = RandomNumbers.randomIntBetween(random(), 1, 4);
for (int i = 0; i < numShapes; ++i) {
ShapeBuilder shape;
do {
shape = RandomShapeGenerator.createShapeWithin(random(), mbr);
} while (shape instanceof MultiPointBuilder);
gcb.shape(shape);
}
}
client().admin().indices().prepareCreate("test").addMapping("type", "location", "type=geo_shape,tree=quadtree" )
.get();
if (usePrefixTrees) {
client().admin().indices().prepareCreate("test").addMapping("type", "location", "type=geo_shape,tree=quadtree")
.execute().actionGet();
} else {
client().admin().indices().prepareCreate("test").addMapping("type", "location", "type=geo_shape")
.execute().actionGet();
}
XContentBuilder docSource = gcb.toXContent(jsonBuilder().startObject().field("location"), null).endObject();
client().prepareIndex("test", "type", "1").setSource(docSource).setRefreshPolicy(IMMEDIATE).get();
@ -763,4 +783,77 @@ public class GeoShapeQueryTests extends ESSingleNodeTestCase {
assertNotEquals("1", response.getHits().getAt(0).getId());
assertNotEquals("1", response.getHits().getAt(1).getId());
}
public void testGeometryCollectionRelations() throws IOException {
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject()
.startObject("doc")
.startObject("properties")
.startObject("geo").field("type", "geo_shape").endObject()
.endObject()
.endObject()
.endObject();
createIndex("test", Settings.builder().put("index.number_of_shards", 1).build(), "doc", mapping);
EnvelopeBuilder envelopeBuilder = new EnvelopeBuilder(new Coordinate(-10, 10), new Coordinate(10, -10));
client().index(new IndexRequest("test")
.source(jsonBuilder().startObject().field("geo", envelopeBuilder).endObject())
.setRefreshPolicy(IMMEDIATE)).actionGet();
{
// A geometry collection that is fully within the indexed shape
GeometryCollectionBuilder builder = new GeometryCollectionBuilder();
builder.shape(new PointBuilder(1, 2));
builder.shape(new PointBuilder(-2, -1));
SearchResponse response = client().prepareSearch("test")
.setQuery(geoShapeQuery("geo", builder.buildGeometry()).relation(ShapeRelation.CONTAINS))
.get();
assertEquals(1, response.getHits().getTotalHits().value);
response = client().prepareSearch("test")
.setQuery(geoShapeQuery("geo", builder.buildGeometry()).relation(ShapeRelation.INTERSECTS))
.get();
assertEquals(1, response.getHits().getTotalHits().value);
response = client().prepareSearch("test")
.setQuery(geoShapeQuery("geo", builder.buildGeometry()).relation(ShapeRelation.DISJOINT))
.get();
assertEquals(0, response.getHits().getTotalHits().value);
}
// A geometry collection that is partially within the indexed shape
{
GeometryCollectionBuilder builder = new GeometryCollectionBuilder();
builder.shape(new PointBuilder(1, 2));
builder.shape(new PointBuilder(20, 30));
SearchResponse response = client().prepareSearch("test")
.setQuery(geoShapeQuery("geo", builder.buildGeometry()).relation(ShapeRelation.CONTAINS))
.get();
assertEquals(0, response.getHits().getTotalHits().value);
response = client().prepareSearch("test")
.setQuery(geoShapeQuery("geo", builder.buildGeometry()).relation(ShapeRelation.INTERSECTS))
.get();
assertEquals(1, response.getHits().getTotalHits().value);
response = client().prepareSearch("test")
.setQuery(geoShapeQuery("geo", builder.buildGeometry()).relation(ShapeRelation.DISJOINT))
.get();
assertEquals(0, response.getHits().getTotalHits().value);
}
{
// A geometry collection that is disjoint with the indexed shape
GeometryCollectionBuilder builder = new GeometryCollectionBuilder();
builder.shape(new PointBuilder(-20, -30));
builder.shape(new PointBuilder(20, 30));
SearchResponse response = client().prepareSearch("test")
.setQuery(geoShapeQuery("geo", builder.buildGeometry()).relation(ShapeRelation.CONTAINS))
.get();
assertEquals(0, response.getHits().getTotalHits().value);
response = client().prepareSearch("test")
.setQuery(geoShapeQuery("geo", builder.buildGeometry()).relation(ShapeRelation.INTERSECTS))
.get();
assertEquals(0, response.getHits().getTotalHits().value);
response = client().prepareSearch("test")
.setQuery(geoShapeQuery("geo", builder.buildGeometry()).relation(ShapeRelation.DISJOINT))
.get();
assertEquals(1, response.getHits().getTotalHits().value);
}
}
}

View File

@ -5,6 +5,7 @@
*/
package org.elasticsearch.xpack.spatial.index.query;
import org.apache.lucene.document.ShapeField;
import org.apache.lucene.document.XYShape;
import org.apache.lucene.geo.XYLine;
import org.apache.lucene.geo.XYPolygon;
@ -13,6 +14,7 @@ import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.Version;
import org.elasticsearch.common.geo.GeoShapeType;
import org.elasticsearch.common.geo.ShapeRelation;
import org.elasticsearch.geometry.Circle;
@ -38,14 +40,14 @@ public class ShapeQueryProcessor implements AbstractGeometryFieldMapper.QueryPro
@Override
public Query process(Geometry shape, String fieldName, ShapeRelation relation, QueryShardContext context) {
// CONTAINS queries are not yet supported by VECTOR strategy
if (relation == ShapeRelation.CONTAINS) {
throw new QueryShardException(context,
ShapeRelation.CONTAINS + " query relation not supported for Field [" + fieldName + "]");
}
if (shape == null) {
return new MatchNoDocsQuery();
}
// CONTAINS queries are not supported by VECTOR strategy for indices created before version 7.5.0 (Lucene 8.3.0);
if (relation == ShapeRelation.CONTAINS && context.indexVersionCreated().before(Version.V_7_5_0)) {
throw new QueryShardException(context,
ShapeRelation.CONTAINS + " query relation not supported for Field [" + fieldName + "].");
}
// wrap geometry Query as a ConstantScoreQuery
return new ConstantScoreQuery(shape.visit(new ShapeVisitor(context, fieldName, relation)));
}
@ -76,12 +78,21 @@ public class ShapeQueryProcessor implements AbstractGeometryFieldMapper.QueryPro
}
private void visit(BooleanQuery.Builder bqb, GeometryCollection<?> collection) {
BooleanClause.Occur occur;
if (relation == ShapeRelation.CONTAINS || relation == ShapeRelation.DISJOINT) {
// all shapes must be disjoint / must be contained in relation to the indexed shape.
occur = BooleanClause.Occur.MUST;
} else {
// at least one shape must intersect / contain the indexed shape.
occur = BooleanClause.Occur.SHOULD;
}
for (Geometry shape : collection) {
if (shape instanceof MultiPoint) {
// Flatten multipoints
// We do not support multi-point queries?
visit(bqb, (GeometryCollection<?>) shape);
} else {
bqb.add(shape.visit(this), BooleanClause.Occur.SHOULD);
bqb.add(shape.visit(this), occur);
}
}
}
@ -128,7 +139,13 @@ public class ShapeQueryProcessor implements AbstractGeometryFieldMapper.QueryPro
@Override
public Query visit(Point point) {
return XYShape.newBoxQuery(fieldName, relation.getLuceneRelation(),
ShapeField.QueryRelation luceneRelation = relation.getLuceneRelation();
if (luceneRelation == ShapeField.QueryRelation.CONTAINS) {
// contains and intersects are equivalent but the implementation of
// intersects is more efficient.
luceneRelation = ShapeField.QueryRelation.INTERSECTS;
}
return XYShape.newBoxQuery(fieldName, luceneRelation,
(float)point.getX(), (float)point.getX(), (float)point.getY(), (float)point.getY());
}

View File

@ -6,10 +6,14 @@
package org.elasticsearch.xpack.spatial.search;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.geo.GeoJson;
import org.elasticsearch.common.geo.ShapeRelation;
import org.elasticsearch.common.geo.builders.EnvelopeBuilder;
import org.elasticsearch.common.geo.builders.GeometryCollectionBuilder;
import org.elasticsearch.common.geo.builders.PointBuilder;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
@ -25,6 +29,7 @@ import org.elasticsearch.xpack.spatial.index.query.ShapeQueryBuilder;
import org.elasticsearch.xpack.spatial.util.ShapeTestUtils;
import org.locationtech.jts.geom.Coordinate;
import java.io.IOException;
import java.util.Collection;
import java.util.Locale;
@ -239,4 +244,94 @@ public class ShapeQueryTests extends ESSingleNodeTestCase {
.get();
assertTrue(response.getHits().getTotalHits().value > 0);
}
public void testContainsShapeQuery() {
client().admin().indices().prepareCreate("test_contains").addMapping("type", "location", "type=shape")
.execute().actionGet();
String doc = "{\"location\" : {\"type\":\"envelope\", \"coordinates\":[ [-100.0, 100.0], [100.0, -100.0]]}}";
client().prepareIndex("test_contains", "type").setId("1").setSource(doc, XContentType.JSON).setRefreshPolicy(IMMEDIATE).get();
// index the mbr of the collection
EnvelopeBuilder queryShape = new EnvelopeBuilder(new Coordinate(-50, 50), new Coordinate(50, -50));
ShapeQueryBuilder queryBuilder = new ShapeQueryBuilder("location", queryShape.buildGeometry()).relation(ShapeRelation.CONTAINS);
SearchResponse response = client().prepareSearch("test_contains").setQuery(queryBuilder).get();
assertSearchResponse(response);
assertThat(response.getHits().getTotalHits().value, equalTo(1L));
}
public void testGeometryCollectionRelations() throws IOException {
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject()
.startObject("doc")
.startObject("properties")
.startObject("geometry").field("type", "shape").endObject()
.endObject()
.endObject()
.endObject();
createIndex("test_collections", Settings.builder().put("index.number_of_shards", 1).build(), "doc", mapping);
EnvelopeBuilder envelopeBuilder = new EnvelopeBuilder(new Coordinate(-10, 10), new Coordinate(10, -10));
client().index(new IndexRequest("test_collections")
.source(jsonBuilder().startObject().field("geometry", envelopeBuilder).endObject())
.setRefreshPolicy(IMMEDIATE)).actionGet();
{
// A geometry collection that is fully within the indexed shape
GeometryCollectionBuilder builder = new GeometryCollectionBuilder();
builder.shape(new PointBuilder(1, 2));
builder.shape(new PointBuilder(-2, -1));
SearchResponse response = client().prepareSearch("test_collections")
.setQuery(new ShapeQueryBuilder("geometry", builder.buildGeometry()).relation(ShapeRelation.CONTAINS))
.get();
assertEquals(1, response.getHits().getTotalHits().value);
response = client().prepareSearch("test_collections")
.setQuery(new ShapeQueryBuilder("geometry", builder.buildGeometry()).relation(ShapeRelation.INTERSECTS))
.get();
assertEquals(1, response.getHits().getTotalHits().value);
response = client().prepareSearch("test_collections")
.setQuery(new ShapeQueryBuilder("geometry", builder.buildGeometry()).relation(ShapeRelation.DISJOINT))
.get();
assertEquals(0, response.getHits().getTotalHits().value);
}
{
// A geometry collection that is partially within the indexed shape
GeometryCollectionBuilder builder = new GeometryCollectionBuilder();
builder.shape(new PointBuilder(1, 2));
builder.shape(new PointBuilder(20, 30));
SearchResponse response = client().prepareSearch("test_collections")
.setQuery(new ShapeQueryBuilder("geometry", builder.buildGeometry()).relation(ShapeRelation.CONTAINS))
.get();
assertEquals(0, response.getHits().getTotalHits().value);
response = client().prepareSearch("test_collections")
.setQuery(new ShapeQueryBuilder("geometry", builder.buildGeometry()).relation(ShapeRelation.INTERSECTS))
.get();
assertEquals(1, response.getHits().getTotalHits().value);
response = client().prepareSearch("test_collections")
.setQuery(new ShapeQueryBuilder("geometry", builder.buildGeometry()).relation(ShapeRelation.DISJOINT))
.get();
assertEquals(0, response.getHits().getTotalHits().value);
}
{
// A geometry collection that is disjoint with the indexed shape
GeometryCollectionBuilder builder = new GeometryCollectionBuilder();
builder.shape(new PointBuilder(-20, -30));
builder.shape(new PointBuilder(20, 30));
SearchResponse response = client().prepareSearch("test_collections")
.setQuery(new ShapeQueryBuilder("geometry", builder.buildGeometry()).relation(ShapeRelation.CONTAINS))
.get();
assertEquals(0, response.getHits().getTotalHits().value);
response = client().prepareSearch("test_collections")
.setQuery(new ShapeQueryBuilder("geometry", builder.buildGeometry()).relation(ShapeRelation.INTERSECTS))
.get();
assertEquals(0, response.getHits().getTotalHits().value);
response = client().prepareSearch("test_collections")
.setQuery(new ShapeQueryBuilder("geometry", builder.buildGeometry()).relation(ShapeRelation.DISJOINT))
.get();
assertEquals(1, response.getHits().getTotalHits().value);
}
}
}