From 54e00df7f63089243a850bcf29eb20f8009e5dcd Mon Sep 17 00:00:00 2001 From: gf2121 <52390227+gf2121@users.noreply.github.com> Date: Sun, 11 Dec 2022 02:30:17 +0800 Subject: [PATCH] Do int compare instead of ArrayUtil#compareUnsigned4 in LatlonPointQueries (#12006) --- lucene/CHANGES.txt | 2 + .../LatLonPointDistanceFeatureQuery.java | 58 ++++++----------- .../document/LatLonPointDistanceQuery.java | 65 ++++++++----------- .../lucene/document/LatLonPointQuery.java | 35 +++++----- .../document/LongDistanceFeatureQuery.java | 25 +++---- 5 files changed, 77 insertions(+), 108 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 8bc6891efd8..86e7df77a1b 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -213,6 +213,8 @@ Optimizations * GITHUB#11972: `IndexSortSortedNumericDocValuesRangeQuery` can now also optimize query execution with points for descending sorts. (Adrien Grand) +* GITHUB#12006: Do ints compare instead of ArrayUtil#compareUnsigned4 in LatlonPointQueries. (Guo Feng) + Other --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceFeatureQuery.java b/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceFeatureQuery.java index beadd92f31c..77c9954baef 100644 --- a/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceFeatureQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceFeatureQuery.java @@ -37,7 +37,6 @@ import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.ScorerSupplier; import org.apache.lucene.search.Weight; -import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.SloppyMath; @@ -387,17 +386,12 @@ final class LatLonPointDistanceFeatureQuery extends Query { // Ideally we would be doing a distance query but that is too expensive so we approximate // with a box query which performs better. Rectangle box = Rectangle.fromPointDistance(originLat, originLon, maxDistance); - final byte[] minLat = new byte[LatLonPoint.BYTES]; - final byte[] maxLat = new byte[LatLonPoint.BYTES]; - final byte[] minLon = new byte[LatLonPoint.BYTES]; - final byte[] maxLon = new byte[LatLonPoint.BYTES]; + final int minLat = GeoEncodingUtils.encodeLatitude(box.minLat); + final int maxLat = GeoEncodingUtils.encodeLatitude(box.maxLat); + final int minLon = GeoEncodingUtils.encodeLongitude(box.minLon); + final int maxLon = GeoEncodingUtils.encodeLongitude(box.maxLon); final boolean crossDateLine = box.crossesDateline(); - NumericUtils.intToSortableBytes(GeoEncodingUtils.encodeLatitude(box.minLat), minLat, 0); - NumericUtils.intToSortableBytes(GeoEncodingUtils.encodeLatitude(box.maxLat), maxLat, 0); - NumericUtils.intToSortableBytes(GeoEncodingUtils.encodeLongitude(box.minLon), minLon, 0); - NumericUtils.intToSortableBytes(GeoEncodingUtils.encodeLongitude(box.maxLon), maxLon, 0); - DocIdSetBuilder result = new DocIdSetBuilder(maxDoc); final int doc = docID(); IntersectVisitor visitor = @@ -425,21 +419,20 @@ final class LatLonPointDistanceFeatureQuery extends Query { // Already visited or skipped return; } - if (ArrayUtil.compareUnsigned4(packedValue, 0, maxLat, 0) > 0 - || ArrayUtil.compareUnsigned4(packedValue, 0, minLat, 0) < 0) { + int lat = NumericUtils.sortableBytesToInt(packedValue, 0); + if (lat > maxLat || lat < minLat) { // Latitude out of range return; } + int lon = NumericUtils.sortableBytesToInt(packedValue, LatLonPoint.BYTES); if (crossDateLine) { - if (ArrayUtil.compareUnsigned4(packedValue, LatLonPoint.BYTES, minLon, 0) < 0 - && ArrayUtil.compareUnsigned4(packedValue, LatLonPoint.BYTES, maxLon, 0) > 0) { + if (lon < minLon && lon > maxLon) { // Longitude out of range return; } } else { - if (ArrayUtil.compareUnsigned4(packedValue, LatLonPoint.BYTES, maxLon, 0) > 0 - || ArrayUtil.compareUnsigned4(packedValue, LatLonPoint.BYTES, minLon, 0) < 0) { + if (lon > maxLon || lon < minLon) { // Longitude out of range return; } @@ -449,36 +442,27 @@ final class LatLonPointDistanceFeatureQuery extends Query { @Override public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - - if (ArrayUtil.compareUnsigned4(minPackedValue, 0, maxLat, 0) > 0 - || ArrayUtil.compareUnsigned4(maxPackedValue, 0, minLat, 0) < 0) { + int latLowerBound = NumericUtils.sortableBytesToInt(minPackedValue, 0); + int latUpperBound = NumericUtils.sortableBytesToInt(maxPackedValue, 0); + if (latLowerBound > maxLat || latUpperBound < minLat) { return Relation.CELL_OUTSIDE_QUERY; } - boolean crosses = - ArrayUtil.compareUnsigned4(minPackedValue, 0, minLat, 0) < 0 - || ArrayUtil.compareUnsigned4(maxPackedValue, 0, maxLat, 0) > 0; - + boolean crosses = latLowerBound < minLat || latUpperBound > maxLat; + int lonLowerBound = + NumericUtils.sortableBytesToInt(minPackedValue, LatLonPoint.BYTES); + int lonUpperBound = + NumericUtils.sortableBytesToInt(maxPackedValue, LatLonPoint.BYTES); if (crossDateLine) { - if (ArrayUtil.compareUnsigned4(minPackedValue, LatLonPoint.BYTES, maxLon, 0) > 0 - && ArrayUtil.compareUnsigned4(maxPackedValue, LatLonPoint.BYTES, minLon, 0) - < 0) { + if (lonLowerBound > maxLon && lonUpperBound < minLon) { return Relation.CELL_OUTSIDE_QUERY; } - crosses |= - ArrayUtil.compareUnsigned4(minPackedValue, LatLonPoint.BYTES, maxLon, 0) < 0 - || ArrayUtil.compareUnsigned4(maxPackedValue, LatLonPoint.BYTES, minLon, 0) - > 0; + crosses |= lonLowerBound < maxLon || lonUpperBound > minLon; } else { - if (ArrayUtil.compareUnsigned4(minPackedValue, LatLonPoint.BYTES, maxLon, 0) > 0 - || ArrayUtil.compareUnsigned4(maxPackedValue, LatLonPoint.BYTES, minLon, 0) - < 0) { + if (lonLowerBound > maxLon || lonUpperBound < minLon) { return Relation.CELL_OUTSIDE_QUERY; } - crosses |= - ArrayUtil.compareUnsigned4(minPackedValue, LatLonPoint.BYTES, minLon, 0) < 0 - || ArrayUtil.compareUnsigned4(maxPackedValue, LatLonPoint.BYTES, maxLon, 0) - > 0; + crosses |= lonLowerBound < minLon || lonUpperBound > maxLon; } if (crosses) { return Relation.CELL_CROSSES_QUERY; diff --git a/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java index 64eed9cbfc3..9ec9784da6b 100644 --- a/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java @@ -22,7 +22,6 @@ import static org.apache.lucene.geo.GeoEncodingUtils.encodeLatitude; import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitude; import java.io.IOException; -import java.util.Arrays; import org.apache.lucene.geo.GeoEncodingUtils; import org.apache.lucene.geo.GeoUtils; import org.apache.lucene.geo.Rectangle; @@ -42,7 +41,6 @@ import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.ScorerSupplier; import org.apache.lucene.search.Weight; -import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.FixedBitSet; @@ -84,28 +82,25 @@ final class LatLonPointDistanceQuery extends Query { Rectangle box = Rectangle.fromPointDistance(latitude, longitude, radiusMeters); // create bounding box(es) for the distance range // these are pre-encoded with LatLonPoint's encoding - final byte[] minLat = new byte[Integer.BYTES]; - final byte[] maxLat = new byte[Integer.BYTES]; - final byte[] minLon = new byte[Integer.BYTES]; - final byte[] maxLon = new byte[Integer.BYTES]; + final int minLat = encodeLatitude(box.minLat); + final int maxLat = encodeLatitude(box.maxLat); + int minLon; + int maxLon; // second set of longitude ranges to check (for cross-dateline case) - final byte[] minLon2 = new byte[Integer.BYTES]; - - NumericUtils.intToSortableBytes(encodeLatitude(box.minLat), minLat, 0); - NumericUtils.intToSortableBytes(encodeLatitude(box.maxLat), maxLat, 0); + int minLon2; // crosses dateline: split if (box.crossesDateline()) { // box1 - NumericUtils.intToSortableBytes(Integer.MIN_VALUE, minLon, 0); - NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0); + minLon = Integer.MIN_VALUE; + maxLon = encodeLongitude(box.maxLon); // box2 - NumericUtils.intToSortableBytes(encodeLongitude(box.minLon), minLon2, 0); + minLon2 = encodeLongitude(box.minLon); } else { - NumericUtils.intToSortableBytes(encodeLongitude(box.minLon), minLon, 0); - NumericUtils.intToSortableBytes(encodeLongitude(box.maxLon), maxLon, 0); + minLon = encodeLongitude(box.minLon); + maxLon = encodeLongitude(box.maxLon); // disable box2 - NumericUtils.intToSortableBytes(Integer.MAX_VALUE, minLon2, 0); + minLon2 = Integer.MAX_VALUE; } // compute exact sort key: avoid any asin() computations @@ -187,26 +182,18 @@ final class LatLonPointDistanceQuery extends Query { } private boolean matches(byte[] packedValue) { + int lat = NumericUtils.sortableBytesToInt(packedValue, 0); // bounding box check - if (ArrayUtil.compareUnsigned4(packedValue, 0, maxLat, 0) > 0 - || ArrayUtil.compareUnsigned4(packedValue, 0, minLat, 0) < 0) { + if (lat > maxLat || lat < minLat) { // latitude out of bounding box range return false; } - - if ((ArrayUtil.compareUnsigned4(packedValue, Integer.BYTES, maxLon, 0) > 0 - || ArrayUtil.compareUnsigned4(packedValue, Integer.BYTES, minLon, 0) < 0) - && ArrayUtil.compareUnsigned4(packedValue, Integer.BYTES, minLon2, 0) < 0) { + int lon = NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES); + if ((lon > maxLon || lon < minLon) && lon < minLon2) { // longitude out of bounding box range return false; } - - int docLatitude = NumericUtils.sortableBytesToInt(packedValue, 0); - int docLongitude = NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES); - if (distancePredicate.test(docLatitude, docLongitude)) { - return true; - } - return false; + return distancePredicate.test(lat, lon); } // algorithm: we create a bounding box (two bounding boxes if we cross the dateline). @@ -217,24 +204,24 @@ final class LatLonPointDistanceQuery extends Query { // wrapping half way around the world, etc: then this can't work, just go to step 4. // 4. recurse naively (subtrees crossing over circle edge) private Relation relate(byte[] minPackedValue, byte[] maxPackedValue) { - if (Arrays.compareUnsigned(minPackedValue, 0, Integer.BYTES, maxLat, 0, Integer.BYTES) > 0 - || Arrays.compareUnsigned(maxPackedValue, 0, Integer.BYTES, minLat, 0, Integer.BYTES) - < 0) { + int latLowerBound = NumericUtils.sortableBytesToInt(minPackedValue, 0); + int latUpperBound = NumericUtils.sortableBytesToInt(maxPackedValue, 0); + if (latLowerBound > maxLat || latUpperBound < minLat) { // latitude out of bounding box range return Relation.CELL_OUTSIDE_QUERY; } - if ((ArrayUtil.compareUnsigned4(minPackedValue, Integer.BYTES, maxLon, 0) > 0 - || ArrayUtil.compareUnsigned4(maxPackedValue, Integer.BYTES, minLon, 0) < 0) - && ArrayUtil.compareUnsigned4(maxPackedValue, Integer.BYTES, minLon2, 0) < 0) { + int lonLowerBound = NumericUtils.sortableBytesToInt(minPackedValue, LatLonPoint.BYTES); + int lonUpperBound = NumericUtils.sortableBytesToInt(maxPackedValue, LatLonPoint.BYTES); + if ((lonLowerBound > maxLon || lonUpperBound < minLon) && lonUpperBound < minLon2) { // longitude out of bounding box range return Relation.CELL_OUTSIDE_QUERY; } - double latMin = decodeLatitude(minPackedValue, 0); - double lonMin = decodeLongitude(minPackedValue, Integer.BYTES); - double latMax = decodeLatitude(maxPackedValue, 0); - double lonMax = decodeLongitude(maxPackedValue, Integer.BYTES); + double latMin = decodeLatitude(latLowerBound); + double lonMin = decodeLongitude(lonLowerBound); + double latMax = decodeLatitude(latUpperBound); + double lonMax = decodeLongitude(lonUpperBound); return GeoUtils.relate( latMin, latMax, lonMin, lonMax, latitude, longitude, sortKey, axisLat); diff --git a/lucene/core/src/java/org/apache/lucene/document/LatLonPointQuery.java b/lucene/core/src/java/org/apache/lucene/document/LatLonPointQuery.java index 604e61ce484..003e98c7a0d 100644 --- a/lucene/core/src/java/org/apache/lucene/document/LatLonPointQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/LatLonPointQuery.java @@ -32,7 +32,6 @@ import org.apache.lucene.geo.LatLonGeometry; import org.apache.lucene.geo.Line; import org.apache.lucene.geo.Point; import org.apache.lucene.index.PointValues.Relation; -import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.NumericUtils; /** @@ -90,31 +89,33 @@ final class LatLonPointQuery extends SpatialQuery { GeoEncodingUtils.createComponentPredicate(queryComponent2D); // bounding box over all geometries, this can speed up tree intersection/cheaply improve // approximation for complex multi-geometries - final byte[] minLat = new byte[Integer.BYTES]; - final byte[] maxLat = new byte[Integer.BYTES]; - final byte[] minLon = new byte[Integer.BYTES]; - final byte[] maxLon = new byte[Integer.BYTES]; - NumericUtils.intToSortableBytes(encodeLatitude(queryComponent2D.getMinY()), minLat, 0); - NumericUtils.intToSortableBytes(encodeLatitude(queryComponent2D.getMaxY()), maxLat, 0); - NumericUtils.intToSortableBytes(encodeLongitude(queryComponent2D.getMinX()), minLon, 0); - NumericUtils.intToSortableBytes(encodeLongitude(queryComponent2D.getMaxX()), maxLon, 0); + final int minLat = encodeLatitude(queryComponent2D.getMinY()); + final int maxLat = encodeLatitude(queryComponent2D.getMaxY()); + final int minLon = encodeLongitude(queryComponent2D.getMinX()); + final int maxLon = encodeLongitude(queryComponent2D.getMaxX()); return new SpatialVisitor() { @Override protected Relation relate(byte[] minPackedValue, byte[] maxPackedValue) { - if (ArrayUtil.compareUnsigned4(minPackedValue, 0, maxLat, 0) > 0 - || ArrayUtil.compareUnsigned4(maxPackedValue, 0, minLat, 0) < 0 - || ArrayUtil.compareUnsigned4(minPackedValue, Integer.BYTES, maxLon, 0) > 0 - || ArrayUtil.compareUnsigned4(maxPackedValue, Integer.BYTES, minLon, 0) < 0) { + int latLowerBound = NumericUtils.sortableBytesToInt(minPackedValue, 0); + int latUpperBound = NumericUtils.sortableBytesToInt(maxPackedValue, 0); + if (latLowerBound > maxLat || latUpperBound < minLat) { // outside of global bounding box range return Relation.CELL_OUTSIDE_QUERY; } - double cellMinLat = decodeLatitude(minPackedValue, 0); - double cellMinLon = decodeLongitude(minPackedValue, Integer.BYTES); - double cellMaxLat = decodeLatitude(maxPackedValue, 0); - double cellMaxLon = decodeLongitude(maxPackedValue, Integer.BYTES); + int lonLowerBound = NumericUtils.sortableBytesToInt(minPackedValue, LatLonPoint.BYTES); + int lonUpperBound = NumericUtils.sortableBytesToInt(maxPackedValue, LatLonPoint.BYTES); + if (lonLowerBound > maxLon || lonUpperBound < minLon) { + // outside of global bounding box range + return Relation.CELL_OUTSIDE_QUERY; + } + + double cellMinLat = decodeLatitude(latLowerBound); + double cellMinLon = decodeLongitude(lonLowerBound); + double cellMaxLat = decodeLatitude(latUpperBound); + double cellMaxLon = decodeLongitude(lonUpperBound); return queryComponent2D.relate(cellMinLon, cellMaxLon, cellMinLat, cellMaxLat); } diff --git a/lucene/core/src/java/org/apache/lucene/document/LongDistanceFeatureQuery.java b/lucene/core/src/java/org/apache/lucene/document/LongDistanceFeatureQuery.java index c9e3a6ec626..7ab4d3cae34 100644 --- a/lucene/core/src/java/org/apache/lucene/document/LongDistanceFeatureQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/LongDistanceFeatureQuery.java @@ -34,8 +34,8 @@ import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.ScorerSupplier; import org.apache.lucene.search.Weight; -import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.DocIdSetBuilder; +import org.apache.lucene.util.NumericUtils; final class LongDistanceFeatureQuery extends Query { @@ -378,11 +378,8 @@ final class LongDistanceFeatureQuery extends Query { // overflow maxValue = Long.MAX_VALUE; } - - final byte[] minValueAsBytes = new byte[Long.BYTES]; - LongPoint.encodeDimension(minValue, minValueAsBytes, 0); - final byte[] maxValueAsBytes = new byte[Long.BYTES]; - LongPoint.encodeDimension(maxValue, maxValueAsBytes, 0); + long min = minValue; + long max = maxValue; DocIdSetBuilder result = new DocIdSetBuilder(maxDoc); final int doc = docID(); @@ -411,14 +408,11 @@ final class LongDistanceFeatureQuery extends Query { // Already visited or skipped return; } - if (ArrayUtil.compareUnsigned8(packedValue, 0, minValueAsBytes, 0) < 0) { + long docValue = NumericUtils.sortableBytesToLong(packedValue, 0); + if (docValue < min || docValue > max) { // Doc's value is too low, in this dimension return; } - if (ArrayUtil.compareUnsigned8(packedValue, 0, maxValueAsBytes, 0) > 0) { - // Doc's value is too high, in this dimension - return; - } // Doc is in-bounds adder.add(docID); @@ -426,13 +420,14 @@ final class LongDistanceFeatureQuery extends Query { @Override public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - if (ArrayUtil.compareUnsigned8(minPackedValue, 0, maxValueAsBytes, 0) > 0 - || ArrayUtil.compareUnsigned8(maxPackedValue, 0, minValueAsBytes, 0) < 0) { + long minDocValue = NumericUtils.sortableBytesToLong(minPackedValue, 0); + long maxDocValue = NumericUtils.sortableBytesToLong(maxPackedValue, 0); + + if (minDocValue > max || maxDocValue < min) { return Relation.CELL_OUTSIDE_QUERY; } - if (ArrayUtil.compareUnsigned8(minPackedValue, 0, minValueAsBytes, 0) < 0 - || ArrayUtil.compareUnsigned8(maxPackedValue, 0, maxValueAsBytes, 0) > 0) { + if (minDocValue < min || maxDocValue > max) { return Relation.CELL_CROSSES_QUERY; }