Do int compare instead of ArrayUtil#compareUnsigned4 in LatlonPointQueries (#12006)

This commit is contained in:
gf2121 2022-12-11 02:30:17 +08:00 committed by GitHub
parent 9ff989ec00
commit 54e00df7f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 77 additions and 108 deletions

View File

@ -213,6 +213,8 @@ Optimizations
* GITHUB#11972: `IndexSortSortedNumericDocValuesRangeQuery` can now also * GITHUB#11972: `IndexSortSortedNumericDocValuesRangeQuery` can now also
optimize query execution with points for descending sorts. (Adrien Grand) optimize query execution with points for descending sorts. (Adrien Grand)
* GITHUB#12006: Do ints compare instead of ArrayUtil#compareUnsigned4 in LatlonPointQueries. (Guo Feng)
Other Other
--------------------- ---------------------

View File

@ -37,7 +37,6 @@ import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier; import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.SloppyMath; import org.apache.lucene.util.SloppyMath;
@ -387,17 +386,12 @@ final class LatLonPointDistanceFeatureQuery extends Query {
// Ideally we would be doing a distance query but that is too expensive so we approximate // Ideally we would be doing a distance query but that is too expensive so we approximate
// with a box query which performs better. // with a box query which performs better.
Rectangle box = Rectangle.fromPointDistance(originLat, originLon, maxDistance); Rectangle box = Rectangle.fromPointDistance(originLat, originLon, maxDistance);
final byte[] minLat = new byte[LatLonPoint.BYTES]; final int minLat = GeoEncodingUtils.encodeLatitude(box.minLat);
final byte[] maxLat = new byte[LatLonPoint.BYTES]; final int maxLat = GeoEncodingUtils.encodeLatitude(box.maxLat);
final byte[] minLon = new byte[LatLonPoint.BYTES]; final int minLon = GeoEncodingUtils.encodeLongitude(box.minLon);
final byte[] maxLon = new byte[LatLonPoint.BYTES]; final int maxLon = GeoEncodingUtils.encodeLongitude(box.maxLon);
final boolean crossDateLine = box.crossesDateline(); final boolean crossDateLine = box.crossesDateline();
NumericUtils.intToSortableBytes(GeoEncodingUtils.encodeLatitude(box.minLat), minLat, 0);
NumericUtils.intToSortableBytes(GeoEncodingUtils.encodeLatitude(box.maxLat), maxLat, 0);
NumericUtils.intToSortableBytes(GeoEncodingUtils.encodeLongitude(box.minLon), minLon, 0);
NumericUtils.intToSortableBytes(GeoEncodingUtils.encodeLongitude(box.maxLon), maxLon, 0);
DocIdSetBuilder result = new DocIdSetBuilder(maxDoc); DocIdSetBuilder result = new DocIdSetBuilder(maxDoc);
final int doc = docID(); final int doc = docID();
IntersectVisitor visitor = IntersectVisitor visitor =
@ -425,21 +419,20 @@ final class LatLonPointDistanceFeatureQuery extends Query {
// Already visited or skipped // Already visited or skipped
return; return;
} }
if (ArrayUtil.compareUnsigned4(packedValue, 0, maxLat, 0) > 0 int lat = NumericUtils.sortableBytesToInt(packedValue, 0);
|| ArrayUtil.compareUnsigned4(packedValue, 0, minLat, 0) < 0) { if (lat > maxLat || lat < minLat) {
// Latitude out of range // Latitude out of range
return; return;
} }
int lon = NumericUtils.sortableBytesToInt(packedValue, LatLonPoint.BYTES);
if (crossDateLine) { if (crossDateLine) {
if (ArrayUtil.compareUnsigned4(packedValue, LatLonPoint.BYTES, minLon, 0) < 0 if (lon < minLon && lon > maxLon) {
&& ArrayUtil.compareUnsigned4(packedValue, LatLonPoint.BYTES, maxLon, 0) > 0) {
// Longitude out of range // Longitude out of range
return; return;
} }
} else { } else {
if (ArrayUtil.compareUnsigned4(packedValue, LatLonPoint.BYTES, maxLon, 0) > 0 if (lon > maxLon || lon < minLon) {
|| ArrayUtil.compareUnsigned4(packedValue, LatLonPoint.BYTES, minLon, 0) < 0) {
// Longitude out of range // Longitude out of range
return; return;
} }
@ -449,36 +442,27 @@ final class LatLonPointDistanceFeatureQuery extends Query {
@Override @Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
int latLowerBound = NumericUtils.sortableBytesToInt(minPackedValue, 0);
if (ArrayUtil.compareUnsigned4(minPackedValue, 0, maxLat, 0) > 0 int latUpperBound = NumericUtils.sortableBytesToInt(maxPackedValue, 0);
|| ArrayUtil.compareUnsigned4(maxPackedValue, 0, minLat, 0) < 0) { if (latLowerBound > maxLat || latUpperBound < minLat) {
return Relation.CELL_OUTSIDE_QUERY; return Relation.CELL_OUTSIDE_QUERY;
} }
boolean crosses = boolean crosses = latLowerBound < minLat || latUpperBound > maxLat;
ArrayUtil.compareUnsigned4(minPackedValue, 0, minLat, 0) < 0 int lonLowerBound =
|| ArrayUtil.compareUnsigned4(maxPackedValue, 0, maxLat, 0) > 0; NumericUtils.sortableBytesToInt(minPackedValue, LatLonPoint.BYTES);
int lonUpperBound =
NumericUtils.sortableBytesToInt(maxPackedValue, LatLonPoint.BYTES);
if (crossDateLine) { if (crossDateLine) {
if (ArrayUtil.compareUnsigned4(minPackedValue, LatLonPoint.BYTES, maxLon, 0) > 0 if (lonLowerBound > maxLon && lonUpperBound < minLon) {
&& ArrayUtil.compareUnsigned4(maxPackedValue, LatLonPoint.BYTES, minLon, 0)
< 0) {
return Relation.CELL_OUTSIDE_QUERY; return Relation.CELL_OUTSIDE_QUERY;
} }
crosses |= crosses |= lonLowerBound < maxLon || lonUpperBound > minLon;
ArrayUtil.compareUnsigned4(minPackedValue, LatLonPoint.BYTES, maxLon, 0) < 0
|| ArrayUtil.compareUnsigned4(maxPackedValue, LatLonPoint.BYTES, minLon, 0)
> 0;
} else { } else {
if (ArrayUtil.compareUnsigned4(minPackedValue, LatLonPoint.BYTES, maxLon, 0) > 0 if (lonLowerBound > maxLon || lonUpperBound < minLon) {
|| ArrayUtil.compareUnsigned4(maxPackedValue, LatLonPoint.BYTES, minLon, 0)
< 0) {
return Relation.CELL_OUTSIDE_QUERY; return Relation.CELL_OUTSIDE_QUERY;
} }
crosses |= crosses |= lonLowerBound < minLon || lonUpperBound > maxLon;
ArrayUtil.compareUnsigned4(minPackedValue, LatLonPoint.BYTES, minLon, 0) < 0
|| ArrayUtil.compareUnsigned4(maxPackedValue, LatLonPoint.BYTES, maxLon, 0)
> 0;
} }
if (crosses) { if (crosses) {
return Relation.CELL_CROSSES_QUERY; return Relation.CELL_CROSSES_QUERY;

View File

@ -22,7 +22,6 @@ import static org.apache.lucene.geo.GeoEncodingUtils.encodeLatitude;
import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitude; import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitude;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.geo.GeoEncodingUtils; import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.geo.GeoUtils; import org.apache.lucene.geo.GeoUtils;
import org.apache.lucene.geo.Rectangle; import org.apache.lucene.geo.Rectangle;
@ -42,7 +41,6 @@ import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier; import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
@ -84,28 +82,25 @@ final class LatLonPointDistanceQuery extends Query {
Rectangle box = Rectangle.fromPointDistance(latitude, longitude, radiusMeters); Rectangle box = Rectangle.fromPointDistance(latitude, longitude, radiusMeters);
// create bounding box(es) for the distance range // create bounding box(es) for the distance range
// these are pre-encoded with LatLonPoint's encoding // these are pre-encoded with LatLonPoint's encoding
final byte[] minLat = new byte[Integer.BYTES]; final int minLat = encodeLatitude(box.minLat);
final byte[] maxLat = new byte[Integer.BYTES]; final int maxLat = encodeLatitude(box.maxLat);
final byte[] minLon = new byte[Integer.BYTES]; int minLon;
final byte[] maxLon = new byte[Integer.BYTES]; int maxLon;
// second set of longitude ranges to check (for cross-dateline case) // second set of longitude ranges to check (for cross-dateline case)
final byte[] minLon2 = new byte[Integer.BYTES]; int minLon2;
NumericUtils.intToSortableBytes(encodeLatitude(box.minLat), minLat, 0);
NumericUtils.intToSortableBytes(encodeLatitude(box.maxLat), maxLat, 0);
// crosses dateline: split // crosses dateline: split
if (box.crossesDateline()) { if (box.crossesDateline()) {
// box1 // box1
NumericUtils.intToSortableBytes(Integer.MIN_VALUE, minLon, 0); minLon = Integer.MIN_VALUE;
NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0); maxLon = encodeLongitude(box.maxLon);
// box2 // box2
NumericUtils.intToSortableBytes(encodeLongitude(box.minLon), minLon2, 0); minLon2 = encodeLongitude(box.minLon);
} else { } else {
NumericUtils.intToSortableBytes(encodeLongitude(box.minLon), minLon, 0); minLon = encodeLongitude(box.minLon);
NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0); maxLon = encodeLongitude(box.maxLon);
// disable box2 // disable box2
NumericUtils.intToSortableBytes(Integer.MAX_VALUE, minLon2, 0); minLon2 = Integer.MAX_VALUE;
} }
// compute exact sort key: avoid any asin() computations // compute exact sort key: avoid any asin() computations
@ -187,26 +182,18 @@ final class LatLonPointDistanceQuery extends Query {
} }
private boolean matches(byte[] packedValue) { private boolean matches(byte[] packedValue) {
int lat = NumericUtils.sortableBytesToInt(packedValue, 0);
// bounding box check // bounding box check
if (ArrayUtil.compareUnsigned4(packedValue, 0, maxLat, 0) > 0 if (lat > maxLat || lat < minLat) {
|| ArrayUtil.compareUnsigned4(packedValue, 0, minLat, 0) < 0) {
// latitude out of bounding box range // latitude out of bounding box range
return false; return false;
} }
int lon = NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES);
if ((ArrayUtil.compareUnsigned4(packedValue, Integer.BYTES, maxLon, 0) > 0 if ((lon > maxLon || lon < minLon) && lon < minLon2) {
|| ArrayUtil.compareUnsigned4(packedValue, Integer.BYTES, minLon, 0) < 0)
&& ArrayUtil.compareUnsigned4(packedValue, Integer.BYTES, minLon2, 0) < 0) {
// longitude out of bounding box range // longitude out of bounding box range
return false; return false;
} }
return distancePredicate.test(lat, lon);
int docLatitude = NumericUtils.sortableBytesToInt(packedValue, 0);
int docLongitude = NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES);
if (distancePredicate.test(docLatitude, docLongitude)) {
return true;
}
return false;
} }
// algorithm: we create a bounding box (two bounding boxes if we cross the dateline). // algorithm: we create a bounding box (two bounding boxes if we cross the dateline).
@ -217,24 +204,24 @@ final class LatLonPointDistanceQuery extends Query {
// wrapping half way around the world, etc: then this can't work, just go to step 4. // wrapping half way around the world, etc: then this can't work, just go to step 4.
// 4. recurse naively (subtrees crossing over circle edge) // 4. recurse naively (subtrees crossing over circle edge)
private Relation relate(byte[] minPackedValue, byte[] maxPackedValue) { private Relation relate(byte[] minPackedValue, byte[] maxPackedValue) {
if (Arrays.compareUnsigned(minPackedValue, 0, Integer.BYTES, maxLat, 0, Integer.BYTES) > 0 int latLowerBound = NumericUtils.sortableBytesToInt(minPackedValue, 0);
|| Arrays.compareUnsigned(maxPackedValue, 0, Integer.BYTES, minLat, 0, Integer.BYTES) int latUpperBound = NumericUtils.sortableBytesToInt(maxPackedValue, 0);
< 0) { if (latLowerBound > maxLat || latUpperBound < minLat) {
// latitude out of bounding box range // latitude out of bounding box range
return Relation.CELL_OUTSIDE_QUERY; return Relation.CELL_OUTSIDE_QUERY;
} }
if ((ArrayUtil.compareUnsigned4(minPackedValue, Integer.BYTES, maxLon, 0) > 0 int lonLowerBound = NumericUtils.sortableBytesToInt(minPackedValue, LatLonPoint.BYTES);
|| ArrayUtil.compareUnsigned4(maxPackedValue, Integer.BYTES, minLon, 0) < 0) int lonUpperBound = NumericUtils.sortableBytesToInt(maxPackedValue, LatLonPoint.BYTES);
&& ArrayUtil.compareUnsigned4(maxPackedValue, Integer.BYTES, minLon2, 0) < 0) { if ((lonLowerBound > maxLon || lonUpperBound < minLon) && lonUpperBound < minLon2) {
// longitude out of bounding box range // longitude out of bounding box range
return Relation.CELL_OUTSIDE_QUERY; return Relation.CELL_OUTSIDE_QUERY;
} }
double latMin = decodeLatitude(minPackedValue, 0); double latMin = decodeLatitude(latLowerBound);
double lonMin = decodeLongitude(minPackedValue, Integer.BYTES); double lonMin = decodeLongitude(lonLowerBound);
double latMax = decodeLatitude(maxPackedValue, 0); double latMax = decodeLatitude(latUpperBound);
double lonMax = decodeLongitude(maxPackedValue, Integer.BYTES); double lonMax = decodeLongitude(lonUpperBound);
return GeoUtils.relate( return GeoUtils.relate(
latMin, latMax, lonMin, lonMax, latitude, longitude, sortKey, axisLat); latMin, latMax, lonMin, lonMax, latitude, longitude, sortKey, axisLat);

View File

@ -32,7 +32,6 @@ import org.apache.lucene.geo.LatLonGeometry;
import org.apache.lucene.geo.Line; import org.apache.lucene.geo.Line;
import org.apache.lucene.geo.Point; import org.apache.lucene.geo.Point;
import org.apache.lucene.index.PointValues.Relation; import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
/** /**
@ -90,31 +89,33 @@ final class LatLonPointQuery extends SpatialQuery {
GeoEncodingUtils.createComponentPredicate(queryComponent2D); GeoEncodingUtils.createComponentPredicate(queryComponent2D);
// bounding box over all geometries, this can speed up tree intersection/cheaply improve // bounding box over all geometries, this can speed up tree intersection/cheaply improve
// approximation for complex multi-geometries // approximation for complex multi-geometries
final byte[] minLat = new byte[Integer.BYTES]; final int minLat = encodeLatitude(queryComponent2D.getMinY());
final byte[] maxLat = new byte[Integer.BYTES]; final int maxLat = encodeLatitude(queryComponent2D.getMaxY());
final byte[] minLon = new byte[Integer.BYTES]; final int minLon = encodeLongitude(queryComponent2D.getMinX());
final byte[] maxLon = new byte[Integer.BYTES]; final int maxLon = encodeLongitude(queryComponent2D.getMaxX());
NumericUtils.intToSortableBytes(encodeLatitude(queryComponent2D.getMinY()), minLat, 0);
NumericUtils.intToSortableBytes(encodeLatitude(queryComponent2D.getMaxY()), maxLat, 0);
NumericUtils.intToSortableBytes(encodeLongitude(queryComponent2D.getMinX()), minLon, 0);
NumericUtils.intToSortableBytes(encodeLongitude(queryComponent2D.getMaxX()), maxLon, 0);
return new SpatialVisitor() { return new SpatialVisitor() {
@Override @Override
protected Relation relate(byte[] minPackedValue, byte[] maxPackedValue) { protected Relation relate(byte[] minPackedValue, byte[] maxPackedValue) {
if (ArrayUtil.compareUnsigned4(minPackedValue, 0, maxLat, 0) > 0 int latLowerBound = NumericUtils.sortableBytesToInt(minPackedValue, 0);
|| ArrayUtil.compareUnsigned4(maxPackedValue, 0, minLat, 0) < 0 int latUpperBound = NumericUtils.sortableBytesToInt(maxPackedValue, 0);
|| ArrayUtil.compareUnsigned4(minPackedValue, Integer.BYTES, maxLon, 0) > 0 if (latLowerBound > maxLat || latUpperBound < minLat) {
|| ArrayUtil.compareUnsigned4(maxPackedValue, Integer.BYTES, minLon, 0) < 0) {
// outside of global bounding box range // outside of global bounding box range
return Relation.CELL_OUTSIDE_QUERY; return Relation.CELL_OUTSIDE_QUERY;
} }
double cellMinLat = decodeLatitude(minPackedValue, 0); int lonLowerBound = NumericUtils.sortableBytesToInt(minPackedValue, LatLonPoint.BYTES);
double cellMinLon = decodeLongitude(minPackedValue, Integer.BYTES); int lonUpperBound = NumericUtils.sortableBytesToInt(maxPackedValue, LatLonPoint.BYTES);
double cellMaxLat = decodeLatitude(maxPackedValue, 0); if (lonLowerBound > maxLon || lonUpperBound < minLon) {
double cellMaxLon = decodeLongitude(maxPackedValue, Integer.BYTES); // outside of global bounding box range
return Relation.CELL_OUTSIDE_QUERY;
}
double cellMinLat = decodeLatitude(latLowerBound);
double cellMinLon = decodeLongitude(lonLowerBound);
double cellMaxLat = decodeLatitude(latUpperBound);
double cellMaxLon = decodeLongitude(lonUpperBound);
return queryComponent2D.relate(cellMinLon, cellMaxLon, cellMinLat, cellMaxLat); return queryComponent2D.relate(cellMinLon, cellMaxLon, cellMinLat, cellMaxLat);
} }

View File

@ -34,8 +34,8 @@ import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier; import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.NumericUtils;
final class LongDistanceFeatureQuery extends Query { final class LongDistanceFeatureQuery extends Query {
@ -378,11 +378,8 @@ final class LongDistanceFeatureQuery extends Query {
// overflow // overflow
maxValue = Long.MAX_VALUE; maxValue = Long.MAX_VALUE;
} }
long min = minValue;
final byte[] minValueAsBytes = new byte[Long.BYTES]; long max = maxValue;
LongPoint.encodeDimension(minValue, minValueAsBytes, 0);
final byte[] maxValueAsBytes = new byte[Long.BYTES];
LongPoint.encodeDimension(maxValue, maxValueAsBytes, 0);
DocIdSetBuilder result = new DocIdSetBuilder(maxDoc); DocIdSetBuilder result = new DocIdSetBuilder(maxDoc);
final int doc = docID(); final int doc = docID();
@ -411,14 +408,11 @@ final class LongDistanceFeatureQuery extends Query {
// Already visited or skipped // Already visited or skipped
return; return;
} }
if (ArrayUtil.compareUnsigned8(packedValue, 0, minValueAsBytes, 0) < 0) { long docValue = NumericUtils.sortableBytesToLong(packedValue, 0);
if (docValue < min || docValue > max) {
// Doc's value is too low, in this dimension // Doc's value is too low, in this dimension
return; return;
} }
if (ArrayUtil.compareUnsigned8(packedValue, 0, maxValueAsBytes, 0) > 0) {
// Doc's value is too high, in this dimension
return;
}
// Doc is in-bounds // Doc is in-bounds
adder.add(docID); adder.add(docID);
@ -426,13 +420,14 @@ final class LongDistanceFeatureQuery extends Query {
@Override @Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (ArrayUtil.compareUnsigned8(minPackedValue, 0, maxValueAsBytes, 0) > 0 long minDocValue = NumericUtils.sortableBytesToLong(minPackedValue, 0);
|| ArrayUtil.compareUnsigned8(maxPackedValue, 0, minValueAsBytes, 0) < 0) { long maxDocValue = NumericUtils.sortableBytesToLong(maxPackedValue, 0);
if (minDocValue > max || maxDocValue < min) {
return Relation.CELL_OUTSIDE_QUERY; return Relation.CELL_OUTSIDE_QUERY;
} }
if (ArrayUtil.compareUnsigned8(minPackedValue, 0, minValueAsBytes, 0) < 0 if (minDocValue < min || maxDocValue > max) {
|| ArrayUtil.compareUnsigned8(maxPackedValue, 0, maxValueAsBytes, 0) > 0) {
return Relation.CELL_CROSSES_QUERY; return Relation.CELL_CROSSES_QUERY;
} }