LUCENE-8960: Add LatLonDocValuesPointInPolygonQuery (#851)

This commit is contained in:
Ignacio Vera 2019-09-03 09:45:58 +02:00 committed by iverase
parent dd27d003a4
commit 54685c5e7f
5 changed files with 251 additions and 60 deletions

View File

@ -28,6 +28,8 @@ New Features
* LUCENE-8769: Introduce Range Query For Multiple Connected Ranges (Atri Sharma)
* LUCENE-8960: Introduce LatLonDocValuesPointInPolygonQuery for LatLonDocValuesField (Ignacio Vera)
Improvements
* LUCENE-8874: Show SPI names instead of class names in Luke Analysis tab. (Tomoko Uchida)

View File

@ -21,6 +21,7 @@ import static org.apache.lucene.geo.GeoEncodingUtils.decodeLongitude;
import static org.apache.lucene.geo.GeoEncodingUtils.encodeLatitude;
import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitude;
import org.apache.lucene.geo.Polygon;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.search.FieldDoc;
@ -178,4 +179,19 @@ public class LatLonDocValuesField extends Field {
public static Query newSlowDistanceQuery(String field, double latitude, double longitude, double radiusMeters) {
return new LatLonDocValuesDistanceQuery(field, latitude, longitude, radiusMeters);
}
/**
* Create a query for matching points within the supplied polygons.
* This query is usually slow as it does not use an index structure and needs
* to verify documents one-by-one in order to know whether they match. It is
* best used wrapped in an {@link IndexOrDocValuesQuery} alongside a
* {@link LatLonPoint#newPolygonQuery(String, Polygon...)}.
* @param field field name. must not be null.
* @param polygons array of polygons. must not be null or empty.
* @return query matching points within the given polygons.
* @throws IllegalArgumentException if {@code field} is null or polygons is empty or contain a null polygon.
*/
public static Query newSlowPolygonQuery(String field, Polygon... polygons) {
return new LatLonDocValuesPointInPolygonQuery(field, polygons);
}
}

View File

@ -0,0 +1,146 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.geo.Polygon;
import org.apache.lucene.geo.Polygon2D;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
/** Polygon query for {@link LatLonDocValuesField}. */
public class LatLonDocValuesPointInPolygonQuery extends Query {
private final String field;
private final Polygon[] polygons;
LatLonDocValuesPointInPolygonQuery(String field, Polygon... polygons) {
if (field == null) {
throw new IllegalArgumentException("field must not be null");
}
if (polygons == null) {
throw new IllegalArgumentException("polygons must not be null");
}
if (polygons.length == 0) {
throw new IllegalArgumentException("polygons must not be empty");
}
for (int i = 0; i < polygons.length; i++) {
if (polygons[i] == null) {
throw new IllegalArgumentException("polygon[" + i + "] must not be null");
}
}
this.field = field;
this.polygons = polygons;
}
@Override
public String toString(String field) {
StringBuilder sb = new StringBuilder();
if (!this.field.equals(field)) {
sb.append(this.field);
sb.append(':');
}
sb.append("polygons(").append(Arrays.toString(polygons));
return sb.append(")").toString();
}
@Override
public boolean equals(Object obj) {
if (sameClassAs(obj) == false) {
return false;
}
LatLonDocValuesPointInPolygonQuery other = (LatLonDocValuesPointInPolygonQuery) obj;
return field.equals(other.field) &&
Arrays.equals(polygons, other.polygons);
}
@Override
public int hashCode() {
int h = classHash();
h = 31 * h + field.hashCode();
h = 31 * h + Arrays.hashCode(polygons);
return h;
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
final Polygon2D tree = Polygon2D.create(polygons);
final GeoEncodingUtils.PolygonPredicate polygonPredicate = GeoEncodingUtils.createPolygonPredicate(polygons, tree);
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
final SortedNumericDocValues values = context.reader().getSortedNumericDocValues(field);
if (values == null) {
return null;
}
final TwoPhaseIterator iterator = new TwoPhaseIterator(values) {
@Override
public boolean matches() throws IOException {
for (int i = 0, count = values.docValueCount(); i < count; ++i) {
final long value = values.nextValue();
final int lat = (int) (value >>> 32);
final int lon = (int) (value & 0xFFFFFFFF);
if (polygonPredicate.test(lat, lon)) {
return true;
}
}
return false;
}
@Override
public float matchCost() {
return 1000f; // TODO: what should it be?
}
};
return new ConstantScoreScorer(this, boost, scoreMode, iterator);
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return DocValues.isCacheable(ctx, field);
}
};
}
}

View File

@ -37,6 +37,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.FutureArrays;
@ -84,12 +85,63 @@ final class LatLonPointInPolygonQuery extends Query {
}
}
private IntersectVisitor getIntersectVisitor(DocIdSetBuilder result, Polygon2D tree, GeoEncodingUtils.PolygonPredicate polygonPredicate,
byte[] minLat, byte[] maxLat, byte[] minLon, byte[] maxLon) {
return new IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@Override
public void grow(int count) {
adder = result.grow(count);
}
@Override
public void visit(int docID) {
adder.add(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
if (polygonPredicate.test(NumericUtils.sortableBytesToInt(packedValue, 0),
NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES))) {
visit(docID);
}
}
@Override
public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException {
if (polygonPredicate.test(NumericUtils.sortableBytesToInt(packedValue, 0),
NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES))) {
int docID;
while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
visit(docID);
}
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (FutureArrays.compareUnsigned(minPackedValue, 0, Integer.BYTES, maxLat, 0, Integer.BYTES) > 0 ||
FutureArrays.compareUnsigned(maxPackedValue, 0, Integer.BYTES, minLat, 0, Integer.BYTES) < 0 ||
FutureArrays.compareUnsigned(minPackedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, maxLon, 0, Integer.BYTES) > 0 ||
FutureArrays.compareUnsigned(maxPackedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, minLon, 0, Integer.BYTES) < 0) {
// outside of global bounding box range
return Relation.CELL_OUTSIDE_QUERY;
}
double cellMinLat = decodeLatitude(minPackedValue, 0);
double cellMinLon = decodeLongitude(minPackedValue, Integer.BYTES);
double cellMaxLat = decodeLatitude(maxPackedValue, 0);
double cellMaxLon = decodeLongitude(maxPackedValue, Integer.BYTES);
return tree.relate(cellMinLat, cellMaxLat, cellMinLon, cellMaxLon);
}
};
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
// I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
// used in the first pass:
// bounding box over all polygons, this can speed up tree intersection/cheaply improve approximation for complex multi-polygons
// these are pre-encoded with LatLonPoint's encoding
final Rectangle box = Rectangle.fromPolygon(polygons);
@ -108,7 +160,7 @@ final class LatLonPointInPolygonQuery extends Query {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
PointValues values = reader.getPointValues(field);
if (values == null) {
@ -121,64 +173,39 @@ final class LatLonPointInPolygonQuery extends Query {
return null;
}
LatLonPoint.checkCompatible(fieldInfo);
final Weight weight = this;
// matching docids
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
return new ScorerSupplier() {
values.intersect(
new IntersectVisitor() {
long cost = -1;
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
final IntersectVisitor visitor = getIntersectVisitor(result, tree, polygonPredicate, minLat, maxLat, minLon, maxLon);
DocIdSetBuilder.BulkAdder adder;
@Override
public Scorer get(long leadCost) throws IOException {
values.intersect(visitor);
return new ConstantScoreScorer(weight, score(), scoreMode, result.build().iterator());
}
@Override
public void grow(int count) {
adder = result.grow(count);
}
@Override
public long cost() {
if (cost == -1) {
// Computing the cost may be expensive, so only do it if necessary
cost = values.estimatePointCount(visitor);
assert cost >= 0;
}
return cost;
}
};
}
@Override
public void visit(int docID) {
adder.add(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
if (polygonPredicate.test(NumericUtils.sortableBytesToInt(packedValue, 0),
NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES))) {
visit(docID);
}
}
@Override
public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException {
if (polygonPredicate.test(NumericUtils.sortableBytesToInt(packedValue, 0),
NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES))) {
int docID;
while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
visit(docID);
}
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (FutureArrays.compareUnsigned(minPackedValue, 0, Integer.BYTES, maxLat, 0, Integer.BYTES) > 0 ||
FutureArrays.compareUnsigned(maxPackedValue, 0, Integer.BYTES, minLat, 0, Integer.BYTES) < 0 ||
FutureArrays.compareUnsigned(minPackedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, maxLon, 0, Integer.BYTES) > 0 ||
FutureArrays.compareUnsigned(maxPackedValue, Integer.BYTES, Integer.BYTES + Integer.BYTES, minLon, 0, Integer.BYTES) < 0) {
// outside of global bounding box range
return Relation.CELL_OUTSIDE_QUERY;
}
double cellMinLat = decodeLatitude(minPackedValue, 0);
double cellMinLon = decodeLongitude(minPackedValue, Integer.BYTES);
double cellMaxLat = decodeLatitude(maxPackedValue, 0);
double cellMaxLon = decodeLongitude(maxPackedValue, Integer.BYTES);
return tree.relate(cellMinLat, cellMaxLat, cellMinLon, cellMaxLon);
}
});
return new ConstantScoreScorer(this, score(), scoreMode, result.build().iterator());
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
ScorerSupplier scorerSupplier = scorerSupplier(context);
if (scorerSupplier == null) {
return null;
}
return scorerSupplier.get(Long.MAX_VALUE);
}
@Override
@ -186,6 +213,7 @@ final class LatLonPointInPolygonQuery extends Query {
return true;
}
};
}
/** Returns the query field */

View File

@ -26,7 +26,7 @@ public class TestLatLonDocValuesQueries extends BaseGeoPointTestCase {
@Override
protected boolean supportsPolygons() {
return false;
return true;
}
@Override
@ -46,8 +46,7 @@ public class TestLatLonDocValuesQueries extends BaseGeoPointTestCase {
@Override
protected Query newPolygonQuery(String field, Polygon... polygons) {
fail();
return null;
return LatLonDocValuesField.newSlowPolygonQuery(field, polygons);
}
@Override