Do int compare instead of ArrayUtil#compareUnsigned4 in LatlonPointQueries (#12006)

This commit is contained in:
gf2121 2022-12-11 02:30:17 +08:00 committed by GitHub
parent 9ff989ec00
commit 54e00df7f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 77 additions and 108 deletions

View File

@ -213,6 +213,8 @@ Optimizations
* GITHUB#11972: `IndexSortSortedNumericDocValuesRangeQuery` can now also
optimize query execution with points for descending sorts. (Adrien Grand)
* GITHUB#12006: Do ints compare instead of ArrayUtil#compareUnsigned4 in LatlonPointQueries. (Guo Feng)
Other
---------------------

View File

@ -37,7 +37,6 @@ import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.SloppyMath;
@ -387,17 +386,12 @@ final class LatLonPointDistanceFeatureQuery extends Query {
// Ideally we would be doing a distance query but that is too expensive so we approximate
// with a box query which performs better.
Rectangle box = Rectangle.fromPointDistance(originLat, originLon, maxDistance);
final byte[] minLat = new byte[LatLonPoint.BYTES];
final byte[] maxLat = new byte[LatLonPoint.BYTES];
final byte[] minLon = new byte[LatLonPoint.BYTES];
final byte[] maxLon = new byte[LatLonPoint.BYTES];
final int minLat = GeoEncodingUtils.encodeLatitude(box.minLat);
final int maxLat = GeoEncodingUtils.encodeLatitude(box.maxLat);
final int minLon = GeoEncodingUtils.encodeLongitude(box.minLon);
final int maxLon = GeoEncodingUtils.encodeLongitude(box.maxLon);
final boolean crossDateLine = box.crossesDateline();
NumericUtils.intToSortableBytes(GeoEncodingUtils.encodeLatitude(box.minLat), minLat, 0);
NumericUtils.intToSortableBytes(GeoEncodingUtils.encodeLatitude(box.maxLat), maxLat, 0);
NumericUtils.intToSortableBytes(GeoEncodingUtils.encodeLongitude(box.minLon), minLon, 0);
NumericUtils.intToSortableBytes(GeoEncodingUtils.encodeLongitude(box.maxLon), maxLon, 0);
DocIdSetBuilder result = new DocIdSetBuilder(maxDoc);
final int doc = docID();
IntersectVisitor visitor =
@ -425,21 +419,20 @@ final class LatLonPointDistanceFeatureQuery extends Query {
// Already visited or skipped
return;
}
if (ArrayUtil.compareUnsigned4(packedValue, 0, maxLat, 0) > 0
|| ArrayUtil.compareUnsigned4(packedValue, 0, minLat, 0) < 0) {
int lat = NumericUtils.sortableBytesToInt(packedValue, 0);
if (lat > maxLat || lat < minLat) {
// Latitude out of range
return;
}
int lon = NumericUtils.sortableBytesToInt(packedValue, LatLonPoint.BYTES);
if (crossDateLine) {
if (ArrayUtil.compareUnsigned4(packedValue, LatLonPoint.BYTES, minLon, 0) < 0
&& ArrayUtil.compareUnsigned4(packedValue, LatLonPoint.BYTES, maxLon, 0) > 0) {
if (lon < minLon && lon > maxLon) {
// Longitude out of range
return;
}
} else {
if (ArrayUtil.compareUnsigned4(packedValue, LatLonPoint.BYTES, maxLon, 0) > 0
|| ArrayUtil.compareUnsigned4(packedValue, LatLonPoint.BYTES, minLon, 0) < 0) {
if (lon > maxLon || lon < minLon) {
// Longitude out of range
return;
}
@ -449,36 +442,27 @@ final class LatLonPointDistanceFeatureQuery extends Query {
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (ArrayUtil.compareUnsigned4(minPackedValue, 0, maxLat, 0) > 0
|| ArrayUtil.compareUnsigned4(maxPackedValue, 0, minLat, 0) < 0) {
int latLowerBound = NumericUtils.sortableBytesToInt(minPackedValue, 0);
int latUpperBound = NumericUtils.sortableBytesToInt(maxPackedValue, 0);
if (latLowerBound > maxLat || latUpperBound < minLat) {
return Relation.CELL_OUTSIDE_QUERY;
}
boolean crosses =
ArrayUtil.compareUnsigned4(minPackedValue, 0, minLat, 0) < 0
|| ArrayUtil.compareUnsigned4(maxPackedValue, 0, maxLat, 0) > 0;
boolean crosses = latLowerBound < minLat || latUpperBound > maxLat;
int lonLowerBound =
NumericUtils.sortableBytesToInt(minPackedValue, LatLonPoint.BYTES);
int lonUpperBound =
NumericUtils.sortableBytesToInt(maxPackedValue, LatLonPoint.BYTES);
if (crossDateLine) {
if (ArrayUtil.compareUnsigned4(minPackedValue, LatLonPoint.BYTES, maxLon, 0) > 0
&& ArrayUtil.compareUnsigned4(maxPackedValue, LatLonPoint.BYTES, minLon, 0)
< 0) {
if (lonLowerBound > maxLon && lonUpperBound < minLon) {
return Relation.CELL_OUTSIDE_QUERY;
}
crosses |=
ArrayUtil.compareUnsigned4(minPackedValue, LatLonPoint.BYTES, maxLon, 0) < 0
|| ArrayUtil.compareUnsigned4(maxPackedValue, LatLonPoint.BYTES, minLon, 0)
> 0;
crosses |= lonLowerBound < maxLon || lonUpperBound > minLon;
} else {
if (ArrayUtil.compareUnsigned4(minPackedValue, LatLonPoint.BYTES, maxLon, 0) > 0
|| ArrayUtil.compareUnsigned4(maxPackedValue, LatLonPoint.BYTES, minLon, 0)
< 0) {
if (lonLowerBound > maxLon || lonUpperBound < minLon) {
return Relation.CELL_OUTSIDE_QUERY;
}
crosses |=
ArrayUtil.compareUnsigned4(minPackedValue, LatLonPoint.BYTES, minLon, 0) < 0
|| ArrayUtil.compareUnsigned4(maxPackedValue, LatLonPoint.BYTES, maxLon, 0)
> 0;
crosses |= lonLowerBound < minLon || lonUpperBound > maxLon;
}
if (crosses) {
return Relation.CELL_CROSSES_QUERY;

View File

@ -22,7 +22,6 @@ import static org.apache.lucene.geo.GeoEncodingUtils.encodeLatitude;
import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitude;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.geo.GeoUtils;
import org.apache.lucene.geo.Rectangle;
@ -42,7 +41,6 @@ import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.FixedBitSet;
@ -84,28 +82,25 @@ final class LatLonPointDistanceQuery extends Query {
Rectangle box = Rectangle.fromPointDistance(latitude, longitude, radiusMeters);
// create bounding box(es) for the distance range
// these are pre-encoded with LatLonPoint's encoding
final byte[] minLat = new byte[Integer.BYTES];
final byte[] maxLat = new byte[Integer.BYTES];
final byte[] minLon = new byte[Integer.BYTES];
final byte[] maxLon = new byte[Integer.BYTES];
final int minLat = encodeLatitude(box.minLat);
final int maxLat = encodeLatitude(box.maxLat);
int minLon;
int maxLon;
// second set of longitude ranges to check (for cross-dateline case)
final byte[] minLon2 = new byte[Integer.BYTES];
NumericUtils.intToSortableBytes(encodeLatitude(box.minLat), minLat, 0);
NumericUtils.intToSortableBytes(encodeLatitude(box.maxLat), maxLat, 0);
int minLon2;
// crosses dateline: split
if (box.crossesDateline()) {
// box1
NumericUtils.intToSortableBytes(Integer.MIN_VALUE, minLon, 0);
NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0);
minLon = Integer.MIN_VALUE;
maxLon = encodeLongitude(box.maxLon);
// box2
NumericUtils.intToSortableBytes(encodeLongitude(box.minLon), minLon2, 0);
minLon2 = encodeLongitude(box.minLon);
} else {
NumericUtils.intToSortableBytes(encodeLongitude(box.minLon), minLon, 0);
NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0);
minLon = encodeLongitude(box.minLon);
maxLon = encodeLongitude(box.maxLon);
// disable box2
NumericUtils.intToSortableBytes(Integer.MAX_VALUE, minLon2, 0);
minLon2 = Integer.MAX_VALUE;
}
// compute exact sort key: avoid any asin() computations
@ -187,26 +182,18 @@ final class LatLonPointDistanceQuery extends Query {
}
private boolean matches(byte[] packedValue) {
int lat = NumericUtils.sortableBytesToInt(packedValue, 0);
// bounding box check
if (ArrayUtil.compareUnsigned4(packedValue, 0, maxLat, 0) > 0
|| ArrayUtil.compareUnsigned4(packedValue, 0, minLat, 0) < 0) {
if (lat > maxLat || lat < minLat) {
// latitude out of bounding box range
return false;
}
if ((ArrayUtil.compareUnsigned4(packedValue, Integer.BYTES, maxLon, 0) > 0
|| ArrayUtil.compareUnsigned4(packedValue, Integer.BYTES, minLon, 0) < 0)
&& ArrayUtil.compareUnsigned4(packedValue, Integer.BYTES, minLon2, 0) < 0) {
int lon = NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES);
if ((lon > maxLon || lon < minLon) && lon < minLon2) {
// longitude out of bounding box range
return false;
}
int docLatitude = NumericUtils.sortableBytesToInt(packedValue, 0);
int docLongitude = NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES);
if (distancePredicate.test(docLatitude, docLongitude)) {
return true;
}
return false;
return distancePredicate.test(lat, lon);
}
// algorithm: we create a bounding box (two bounding boxes if we cross the dateline).
@ -217,24 +204,24 @@ final class LatLonPointDistanceQuery extends Query {
// wrapping half way around the world, etc: then this can't work, just go to step 4.
// 4. recurse naively (subtrees crossing over circle edge)
private Relation relate(byte[] minPackedValue, byte[] maxPackedValue) {
if (Arrays.compareUnsigned(minPackedValue, 0, Integer.BYTES, maxLat, 0, Integer.BYTES) > 0
|| Arrays.compareUnsigned(maxPackedValue, 0, Integer.BYTES, minLat, 0, Integer.BYTES)
< 0) {
int latLowerBound = NumericUtils.sortableBytesToInt(minPackedValue, 0);
int latUpperBound = NumericUtils.sortableBytesToInt(maxPackedValue, 0);
if (latLowerBound > maxLat || latUpperBound < minLat) {
// latitude out of bounding box range
return Relation.CELL_OUTSIDE_QUERY;
}
if ((ArrayUtil.compareUnsigned4(minPackedValue, Integer.BYTES, maxLon, 0) > 0
|| ArrayUtil.compareUnsigned4(maxPackedValue, Integer.BYTES, minLon, 0) < 0)
&& ArrayUtil.compareUnsigned4(maxPackedValue, Integer.BYTES, minLon2, 0) < 0) {
int lonLowerBound = NumericUtils.sortableBytesToInt(minPackedValue, LatLonPoint.BYTES);
int lonUpperBound = NumericUtils.sortableBytesToInt(maxPackedValue, LatLonPoint.BYTES);
if ((lonLowerBound > maxLon || lonUpperBound < minLon) && lonUpperBound < minLon2) {
// longitude out of bounding box range
return Relation.CELL_OUTSIDE_QUERY;
}
double latMin = decodeLatitude(minPackedValue, 0);
double lonMin = decodeLongitude(minPackedValue, Integer.BYTES);
double latMax = decodeLatitude(maxPackedValue, 0);
double lonMax = decodeLongitude(maxPackedValue, Integer.BYTES);
double latMin = decodeLatitude(latLowerBound);
double lonMin = decodeLongitude(lonLowerBound);
double latMax = decodeLatitude(latUpperBound);
double lonMax = decodeLongitude(lonUpperBound);
return GeoUtils.relate(
latMin, latMax, lonMin, lonMax, latitude, longitude, sortKey, axisLat);

View File

@ -32,7 +32,6 @@ import org.apache.lucene.geo.LatLonGeometry;
import org.apache.lucene.geo.Line;
import org.apache.lucene.geo.Point;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.NumericUtils;
/**
@ -90,31 +89,33 @@ final class LatLonPointQuery extends SpatialQuery {
GeoEncodingUtils.createComponentPredicate(queryComponent2D);
// bounding box over all geometries, this can speed up tree intersection/cheaply improve
// approximation for complex multi-geometries
final byte[] minLat = new byte[Integer.BYTES];
final byte[] maxLat = new byte[Integer.BYTES];
final byte[] minLon = new byte[Integer.BYTES];
final byte[] maxLon = new byte[Integer.BYTES];
NumericUtils.intToSortableBytes(encodeLatitude(queryComponent2D.getMinY()), minLat, 0);
NumericUtils.intToSortableBytes(encodeLatitude(queryComponent2D.getMaxY()), maxLat, 0);
NumericUtils.intToSortableBytes(encodeLongitude(queryComponent2D.getMinX()), minLon, 0);
NumericUtils.intToSortableBytes(encodeLongitude(queryComponent2D.getMaxX()), maxLon, 0);
final int minLat = encodeLatitude(queryComponent2D.getMinY());
final int maxLat = encodeLatitude(queryComponent2D.getMaxY());
final int minLon = encodeLongitude(queryComponent2D.getMinX());
final int maxLon = encodeLongitude(queryComponent2D.getMaxX());
return new SpatialVisitor() {
@Override
protected Relation relate(byte[] minPackedValue, byte[] maxPackedValue) {
if (ArrayUtil.compareUnsigned4(minPackedValue, 0, maxLat, 0) > 0
|| ArrayUtil.compareUnsigned4(maxPackedValue, 0, minLat, 0) < 0
|| ArrayUtil.compareUnsigned4(minPackedValue, Integer.BYTES, maxLon, 0) > 0
|| ArrayUtil.compareUnsigned4(maxPackedValue, Integer.BYTES, minLon, 0) < 0) {
int latLowerBound = NumericUtils.sortableBytesToInt(minPackedValue, 0);
int latUpperBound = NumericUtils.sortableBytesToInt(maxPackedValue, 0);
if (latLowerBound > maxLat || latUpperBound < minLat) {
// outside of global bounding box range
return Relation.CELL_OUTSIDE_QUERY;
}
double cellMinLat = decodeLatitude(minPackedValue, 0);
double cellMinLon = decodeLongitude(minPackedValue, Integer.BYTES);
double cellMaxLat = decodeLatitude(maxPackedValue, 0);
double cellMaxLon = decodeLongitude(maxPackedValue, Integer.BYTES);
int lonLowerBound = NumericUtils.sortableBytesToInt(minPackedValue, LatLonPoint.BYTES);
int lonUpperBound = NumericUtils.sortableBytesToInt(maxPackedValue, LatLonPoint.BYTES);
if (lonLowerBound > maxLon || lonUpperBound < minLon) {
// outside of global bounding box range
return Relation.CELL_OUTSIDE_QUERY;
}
double cellMinLat = decodeLatitude(latLowerBound);
double cellMinLon = decodeLongitude(lonLowerBound);
double cellMaxLat = decodeLatitude(latUpperBound);
double cellMaxLon = decodeLongitude(lonUpperBound);
return queryComponent2D.relate(cellMinLon, cellMaxLon, cellMinLat, cellMaxLat);
}

View File

@ -34,8 +34,8 @@ import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.NumericUtils;
final class LongDistanceFeatureQuery extends Query {
@ -378,11 +378,8 @@ final class LongDistanceFeatureQuery extends Query {
// overflow
maxValue = Long.MAX_VALUE;
}
final byte[] minValueAsBytes = new byte[Long.BYTES];
LongPoint.encodeDimension(minValue, minValueAsBytes, 0);
final byte[] maxValueAsBytes = new byte[Long.BYTES];
LongPoint.encodeDimension(maxValue, maxValueAsBytes, 0);
long min = minValue;
long max = maxValue;
DocIdSetBuilder result = new DocIdSetBuilder(maxDoc);
final int doc = docID();
@ -411,14 +408,11 @@ final class LongDistanceFeatureQuery extends Query {
// Already visited or skipped
return;
}
if (ArrayUtil.compareUnsigned8(packedValue, 0, minValueAsBytes, 0) < 0) {
long docValue = NumericUtils.sortableBytesToLong(packedValue, 0);
if (docValue < min || docValue > max) {
// Doc's value is too low, in this dimension
return;
}
if (ArrayUtil.compareUnsigned8(packedValue, 0, maxValueAsBytes, 0) > 0) {
// Doc's value is too high, in this dimension
return;
}
// Doc is in-bounds
adder.add(docID);
@ -426,13 +420,14 @@ final class LongDistanceFeatureQuery extends Query {
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (ArrayUtil.compareUnsigned8(minPackedValue, 0, maxValueAsBytes, 0) > 0
|| ArrayUtil.compareUnsigned8(maxPackedValue, 0, minValueAsBytes, 0) < 0) {
long minDocValue = NumericUtils.sortableBytesToLong(minPackedValue, 0);
long maxDocValue = NumericUtils.sortableBytesToLong(maxPackedValue, 0);
if (minDocValue > max || maxDocValue < min) {
return Relation.CELL_OUTSIDE_QUERY;
}
if (ArrayUtil.compareUnsigned8(minPackedValue, 0, minValueAsBytes, 0) < 0
|| ArrayUtil.compareUnsigned8(maxPackedValue, 0, maxValueAsBytes, 0) > 0) {
if (minDocValue < min || maxDocValue > max) {
return Relation.CELL_CROSSES_QUERY;
}