LUCENE-10145: Speed up byte[] comparisons using VarHandles. (#349)

This commit is contained in:
Adrien Grand 2021-10-04 18:35:27 +02:00 committed by GitHub
parent 04fb8c059e
commit 18fc6c1f3e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 187 additions and 344 deletions

View File

@ -42,6 +42,7 @@ import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.FixedBitSet;
@ -187,37 +188,15 @@ final class LatLonPointDistanceQuery extends Query {
private boolean matches(byte[] packedValue) {
// bounding box check
if (Arrays.compareUnsigned(packedValue, 0, Integer.BYTES, maxLat, 0, Integer.BYTES) > 0
|| Arrays.compareUnsigned(packedValue, 0, Integer.BYTES, minLat, 0, Integer.BYTES)
< 0) {
if (ArrayUtil.compareUnsigned4(packedValue, 0, maxLat, 0) > 0
|| ArrayUtil.compareUnsigned4(packedValue, 0, minLat, 0) < 0) {
// latitude out of bounding box range
return false;
}
if ((Arrays.compareUnsigned(
packedValue,
Integer.BYTES,
Integer.BYTES + Integer.BYTES,
maxLon,
0,
Integer.BYTES)
> 0
|| Arrays.compareUnsigned(
packedValue,
Integer.BYTES,
Integer.BYTES + Integer.BYTES,
minLon,
0,
Integer.BYTES)
< 0)
&& Arrays.compareUnsigned(
packedValue,
Integer.BYTES,
Integer.BYTES + Integer.BYTES,
minLon2,
0,
Integer.BYTES)
< 0) {
if ((ArrayUtil.compareUnsigned4(packedValue, Integer.BYTES, maxLon, 0) > 0
|| ArrayUtil.compareUnsigned4(packedValue, Integer.BYTES, minLon, 0) < 0)
&& ArrayUtil.compareUnsigned4(packedValue, Integer.BYTES, minLon2, 0) < 0) {
// longitude out of bounding box range
return false;
}
@ -245,30 +224,9 @@ final class LatLonPointDistanceQuery extends Query {
return Relation.CELL_OUTSIDE_QUERY;
}
if ((Arrays.compareUnsigned(
minPackedValue,
Integer.BYTES,
Integer.BYTES + Integer.BYTES,
maxLon,
0,
Integer.BYTES)
> 0
|| Arrays.compareUnsigned(
maxPackedValue,
Integer.BYTES,
Integer.BYTES + Integer.BYTES,
minLon,
0,
Integer.BYTES)
< 0)
&& Arrays.compareUnsigned(
maxPackedValue,
Integer.BYTES,
Integer.BYTES + Integer.BYTES,
minLon2,
0,
Integer.BYTES)
< 0) {
if ((ArrayUtil.compareUnsigned4(minPackedValue, Integer.BYTES, maxLon, 0) > 0
|| ArrayUtil.compareUnsigned4(maxPackedValue, Integer.BYTES, minLon, 0) < 0)
&& ArrayUtil.compareUnsigned4(maxPackedValue, Integer.BYTES, minLon2, 0) < 0) {
// longitude out of bounding box range
return Relation.CELL_OUTSIDE_QUERY;
}

View File

@ -31,6 +31,7 @@ import org.apache.lucene.geo.LatLonGeometry;
import org.apache.lucene.geo.Line;
import org.apache.lucene.geo.Point;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.NumericUtils;
/**
@ -89,27 +90,13 @@ final class LatLonPointQuery extends SpatialQuery {
NumericUtils.intToSortableBytes(encodeLongitude(component2D.getMaxX()), maxLon, 0);
return new SpatialVisitor() {
@Override
protected Relation relate(byte[] minPackedValue, byte[] maxPackedValue) {
if (Arrays.compareUnsigned(minPackedValue, 0, Integer.BYTES, maxLat, 0, Integer.BYTES) > 0
|| Arrays.compareUnsigned(maxPackedValue, 0, Integer.BYTES, minLat, 0, Integer.BYTES)
< 0
|| Arrays.compareUnsigned(
minPackedValue,
Integer.BYTES,
Integer.BYTES + Integer.BYTES,
maxLon,
0,
Integer.BYTES)
> 0
|| Arrays.compareUnsigned(
maxPackedValue,
Integer.BYTES,
Integer.BYTES + Integer.BYTES,
minLon,
0,
Integer.BYTES)
< 0) {
if (ArrayUtil.compareUnsigned4(minPackedValue, 0, maxLat, 0) > 0
|| ArrayUtil.compareUnsigned4(maxPackedValue, 0, minLat, 0) < 0
|| ArrayUtil.compareUnsigned4(minPackedValue, Integer.BYTES, maxLon, 0) > 0
|| ArrayUtil.compareUnsigned4(maxPackedValue, Integer.BYTES, minLon, 0) < 0) {
// outside of global bounding box range
return Relation.CELL_OUTSIDE_QUERY;
}

View File

@ -24,7 +24,6 @@ import static org.apache.lucene.geo.GeoEncodingUtils.encodeLatitudeCeil;
import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitude;
import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitudeCeil;
import java.util.Arrays;
import java.util.function.Function;
import java.util.function.Predicate;
import org.apache.lucene.document.ShapeField.QueryRelation;
@ -32,6 +31,7 @@ import org.apache.lucene.geo.Component2D;
import org.apache.lucene.geo.GeoUtils;
import org.apache.lucene.geo.Rectangle;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.NumericUtils;
/**
@ -310,7 +310,7 @@ final class LatLonShapeBoundingBoxQuery extends SpatialQuery {
* static utility method to compare a bbox with a range of triangles (just the bbox of the
* triangle collection)
*/
private static Relation compareBBoxToRangeBBox(
private Relation compareBBoxToRangeBBox(
final byte[] bbox,
int minXOffset,
int minYOffset,
@ -324,17 +324,10 @@ final class LatLonShapeBoundingBoxQuery extends SpatialQuery {
return Relation.CELL_OUTSIDE_QUERY;
}
if (Arrays.compareUnsigned(
minTriangle, minXOffset, minXOffset + BYTES, bbox, BYTES, 2 * BYTES)
>= 0
&& Arrays.compareUnsigned(
maxTriangle, maxXOffset, maxXOffset + BYTES, bbox, 3 * BYTES, 4 * BYTES)
<= 0
&& Arrays.compareUnsigned(minTriangle, minYOffset, minYOffset + BYTES, bbox, 0, BYTES)
>= 0
&& Arrays.compareUnsigned(
maxTriangle, maxYOffset, maxYOffset + BYTES, bbox, 2 * BYTES, 3 * BYTES)
<= 0) {
if (ArrayUtil.compareUnsigned4(minTriangle, minXOffset, bbox, BYTES) >= 0
&& ArrayUtil.compareUnsigned4(maxTriangle, maxXOffset, bbox, 3 * BYTES) <= 0
&& ArrayUtil.compareUnsigned4(minTriangle, minYOffset, bbox, 0) >= 0
&& ArrayUtil.compareUnsigned4(maxTriangle, maxYOffset, bbox, 2 * BYTES) <= 0) {
return Relation.CELL_INSIDE_QUERY;
}
@ -345,7 +338,7 @@ final class LatLonShapeBoundingBoxQuery extends SpatialQuery {
* static utility method to compare a bbox with a range of triangles (just the bbox of the
* triangle collection) for intersection
*/
private static Relation intersectBBoxWithRangeBBox(
private Relation intersectBBoxWithRangeBBox(
final byte[] bbox,
int minXOffset,
int minYOffset,
@ -359,47 +352,26 @@ final class LatLonShapeBoundingBoxQuery extends SpatialQuery {
return Relation.CELL_OUTSIDE_QUERY;
}
if (Arrays.compareUnsigned(
minTriangle, minXOffset, minXOffset + BYTES, bbox, BYTES, 2 * BYTES)
>= 0
&& Arrays.compareUnsigned(minTriangle, minYOffset, minYOffset + BYTES, bbox, 0, BYTES)
>= 0) {
if (Arrays.compareUnsigned(
maxTriangle, minXOffset, minXOffset + BYTES, bbox, 3 * BYTES, 4 * BYTES)
<= 0
&& Arrays.compareUnsigned(
maxTriangle, maxYOffset, maxYOffset + BYTES, bbox, 2 * BYTES, 3 * BYTES)
<= 0) {
if (ArrayUtil.compareUnsigned4(minTriangle, minXOffset, bbox, BYTES) >= 0
&& ArrayUtil.compareUnsigned4(minTriangle, minYOffset, bbox, 0) >= 0) {
if (ArrayUtil.compareUnsigned4(maxTriangle, minXOffset, bbox, 3 * BYTES) <= 0
&& ArrayUtil.compareUnsigned4(maxTriangle, maxYOffset, bbox, 2 * BYTES) <= 0) {
return Relation.CELL_INSIDE_QUERY;
}
if (Arrays.compareUnsigned(
maxTriangle, maxXOffset, maxXOffset + BYTES, bbox, 3 * BYTES, 4 * BYTES)
<= 0
&& Arrays.compareUnsigned(
maxTriangle, minYOffset, minYOffset + BYTES, bbox, 2 * BYTES, 3 * BYTES)
<= 0) {
if (ArrayUtil.compareUnsigned4(maxTriangle, maxXOffset, bbox, 3 * BYTES) <= 0
&& ArrayUtil.compareUnsigned4(maxTriangle, minYOffset, bbox, 2 * BYTES) <= 0) {
return Relation.CELL_INSIDE_QUERY;
}
}
if (Arrays.compareUnsigned(
maxTriangle, maxXOffset, maxXOffset + BYTES, bbox, 3 * BYTES, 4 * BYTES)
<= 0
&& Arrays.compareUnsigned(
maxTriangle, maxYOffset, maxYOffset + BYTES, bbox, 2 * BYTES, 3 * BYTES)
<= 0) {
if (Arrays.compareUnsigned(
minTriangle, minXOffset, minXOffset + BYTES, bbox, BYTES, 2 * BYTES)
>= 0
&& Arrays.compareUnsigned(minTriangle, maxYOffset, maxYOffset + BYTES, bbox, 0, BYTES)
>= 0) {
if (ArrayUtil.compareUnsigned4(maxTriangle, maxXOffset, bbox, 3 * BYTES) <= 0
&& ArrayUtil.compareUnsigned4(maxTriangle, maxYOffset, bbox, 2 * BYTES) <= 0) {
if (ArrayUtil.compareUnsigned4(minTriangle, minXOffset, bbox, BYTES) >= 0
&& ArrayUtil.compareUnsigned4(minTriangle, maxYOffset, bbox, 0) >= 0) {
return Relation.CELL_INSIDE_QUERY;
}
if (Arrays.compareUnsigned(
minTriangle, maxXOffset, maxXOffset + BYTES, bbox, BYTES, 2 * BYTES)
>= 0
&& Arrays.compareUnsigned(minTriangle, minYOffset, minYOffset + BYTES, bbox, 0, BYTES)
>= 0) {
if (ArrayUtil.compareUnsigned4(minTriangle, maxXOffset, bbox, BYTES) >= 0
&& ArrayUtil.compareUnsigned4(minTriangle, minYOffset, bbox, 0) >= 0) {
return Relation.CELL_INSIDE_QUERY;
}
}
@ -408,7 +380,7 @@ final class LatLonShapeBoundingBoxQuery extends SpatialQuery {
}
/** static utility method to check a bbox is disjoint with a range of triangles */
private static boolean disjoint(
private boolean disjoint(
final byte[] bbox,
int minXOffset,
int minYOffset,
@ -416,17 +388,10 @@ final class LatLonShapeBoundingBoxQuery extends SpatialQuery {
int maxXOffset,
int maxYOffset,
byte[] maxTriangle) {
return Arrays.compareUnsigned(
minTriangle, minXOffset, minXOffset + BYTES, bbox, 3 * BYTES, 4 * BYTES)
> 0
|| Arrays.compareUnsigned(
maxTriangle, maxXOffset, maxXOffset + BYTES, bbox, BYTES, 2 * BYTES)
< 0
|| Arrays.compareUnsigned(
minTriangle, minYOffset, minYOffset + BYTES, bbox, 2 * BYTES, 3 * BYTES)
> 0
|| Arrays.compareUnsigned(maxTriangle, maxYOffset, maxYOffset + BYTES, bbox, 0, BYTES)
< 0;
return ArrayUtil.compareUnsigned4(minTriangle, minXOffset, bbox, 3 * BYTES) > 0
|| ArrayUtil.compareUnsigned4(maxTriangle, maxXOffset, bbox, BYTES) < 0
|| ArrayUtil.compareUnsigned4(minTriangle, minYOffset, bbox, 2 * BYTES) > 0
|| ArrayUtil.compareUnsigned4(maxTriangle, maxYOffset, bbox, 0) < 0;
}
/** Checks if the rectangle contains the provided point */

View File

@ -17,7 +17,6 @@
package org.apache.lucene.document;
import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
@ -35,6 +34,7 @@ import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.DocIdSetBuilder;
final class LongDistanceFeatureQuery extends Query {
@ -411,13 +411,11 @@ final class LongDistanceFeatureQuery extends Query {
// Already visited or skipped
return;
}
if (Arrays.compareUnsigned(packedValue, 0, Long.BYTES, minValueAsBytes, 0, Long.BYTES)
< 0) {
if (ArrayUtil.compareUnsigned8(packedValue, 0, minValueAsBytes, 0) < 0) {
// Doc's value is too low, in this dimension
return;
}
if (Arrays.compareUnsigned(packedValue, 0, Long.BYTES, maxValueAsBytes, 0, Long.BYTES)
> 0) {
if (ArrayUtil.compareUnsigned8(packedValue, 0, maxValueAsBytes, 0) > 0) {
// Doc's value is too high, in this dimension
return;
}
@ -428,21 +426,13 @@ final class LongDistanceFeatureQuery extends Query {
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (Arrays.compareUnsigned(
minPackedValue, 0, Long.BYTES, maxValueAsBytes, 0, Long.BYTES)
> 0
|| Arrays.compareUnsigned(
maxPackedValue, 0, Long.BYTES, minValueAsBytes, 0, Long.BYTES)
< 0) {
if (ArrayUtil.compareUnsigned8(minPackedValue, 0, maxValueAsBytes, 0) > 0
|| ArrayUtil.compareUnsigned8(maxPackedValue, 0, minValueAsBytes, 0) < 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
if (Arrays.compareUnsigned(
minPackedValue, 0, Long.BYTES, minValueAsBytes, 0, Long.BYTES)
< 0
|| Arrays.compareUnsigned(
maxPackedValue, 0, Long.BYTES, maxValueAsBytes, 0, Long.BYTES)
> 0) {
if (ArrayUtil.compareUnsigned8(minPackedValue, 0, minValueAsBytes, 0) < 0
|| ArrayUtil.compareUnsigned8(maxPackedValue, 0, maxValueAsBytes, 0) > 0) {
return Relation.CELL_CROSSES_QUERY;
}

View File

@ -19,7 +19,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.math.BigInteger;
import java.net.InetAddress;
import java.util.Arrays;
import org.apache.lucene.document.BinaryPoint;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field;
@ -29,6 +28,8 @@ import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LatLonPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ArrayUtil.ByteArrayComparator;
import org.apache.lucene.util.bkd.BKDConfig;
/**
@ -165,16 +166,11 @@ public abstract class PointValues {
} else {
final int numDimensions = values.getNumIndexDimensions();
final int numBytesPerDimension = values.getBytesPerDimension();
final ByteArrayComparator comparator =
ArrayUtil.getUnsignedComparator(numBytesPerDimension);
for (int i = 0; i < numDimensions; ++i) {
int offset = i * numBytesPerDimension;
if (Arrays.compareUnsigned(
leafMinValue,
offset,
offset + numBytesPerDimension,
minValue,
offset,
offset + numBytesPerDimension)
< 0) {
if (comparator.compare(leafMinValue, offset, minValue, offset) < 0) {
System.arraycopy(leafMinValue, offset, minValue, offset, numBytesPerDimension);
}
}
@ -205,16 +201,11 @@ public abstract class PointValues {
} else {
final int numDimensions = values.getNumIndexDimensions();
final int numBytesPerDimension = values.getBytesPerDimension();
final ByteArrayComparator comparator =
ArrayUtil.getUnsignedComparator(numBytesPerDimension);
for (int i = 0; i < numDimensions; ++i) {
int offset = i * numBytesPerDimension;
if (Arrays.compareUnsigned(
leafMaxValue,
offset,
offset + numBytesPerDimension,
maxValue,
offset,
offset + numBytesPerDimension)
> 0) {
if (comparator.compare(leafMaxValue, offset, maxValue, offset) > 0) {
System.arraycopy(leafMaxValue, offset, maxValue, offset, numBytesPerDimension);
}
}

View File

@ -31,6 +31,8 @@ import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.index.PrefixCodedTerms;
import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ArrayUtil.ByteArrayComparator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.BytesRefIterator;
@ -307,11 +309,13 @@ public abstract class PointInSetQuery extends Query implements Accountable {
*/
private class SinglePointVisitor implements IntersectVisitor {
private final ByteArrayComparator comparator;
private final DocIdSetBuilder result;
private final byte[] pointBytes;
private DocIdSetBuilder.BulkAdder adder;
public SinglePointVisitor(DocIdSetBuilder result) {
this.comparator = ArrayUtil.getUnsignedComparator(bytesPerDim);
this.result = result;
this.pointBytes = new byte[bytesPerDim * numDims];
}
@ -361,26 +365,12 @@ public abstract class PointInSetQuery extends Query implements Accountable {
for (int dim = 0; dim < numDims; dim++) {
int offset = dim * bytesPerDim;
int cmpMin =
Arrays.compareUnsigned(
minPackedValue,
offset,
offset + bytesPerDim,
pointBytes,
offset,
offset + bytesPerDim);
int cmpMin = comparator.compare(minPackedValue, offset, pointBytes, offset);
if (cmpMin > 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
int cmpMax =
Arrays.compareUnsigned(
maxPackedValue,
offset,
offset + bytesPerDim,
pointBytes,
offset,
offset + bytesPerDim);
int cmpMax = comparator.compare(maxPackedValue, offset, pointBytes, offset);
if (cmpMax < 0) {
return Relation.CELL_OUTSIDE_QUERY;
}

View File

@ -26,6 +26,7 @@ import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ArrayUtil.ByteArrayComparator;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.FixedBitSet;
@ -121,28 +122,16 @@ public abstract class PointRangeQuery extends Query {
return new ConstantScoreWeight(this, boost) {
private final ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(bytesPerDim);
private boolean matches(byte[] packedValue) {
for (int dim = 0; dim < numDims; dim++) {
int offset = dim * bytesPerDim;
if (Arrays.compareUnsigned(
packedValue,
offset,
offset + bytesPerDim,
lowerPoint,
offset,
offset + bytesPerDim)
< 0) {
if (comparator.compare(packedValue, offset, lowerPoint, offset) < 0) {
// Doc's value is too low, in this dimension
return false;
}
if (Arrays.compareUnsigned(
packedValue,
offset,
offset + bytesPerDim,
upperPoint,
offset,
offset + bytesPerDim)
> 0) {
if (comparator.compare(packedValue, offset, upperPoint, offset) > 0) {
// Doc's value is too high, in this dimension
return false;
}
@ -157,42 +146,14 @@ public abstract class PointRangeQuery extends Query {
for (int dim = 0; dim < numDims; dim++) {
int offset = dim * bytesPerDim;
if (Arrays.compareUnsigned(
minPackedValue,
offset,
offset + bytesPerDim,
upperPoint,
offset,
offset + bytesPerDim)
> 0
|| Arrays.compareUnsigned(
maxPackedValue,
offset,
offset + bytesPerDim,
lowerPoint,
offset,
offset + bytesPerDim)
< 0) {
if (comparator.compare(minPackedValue, offset, upperPoint, offset) > 0
|| comparator.compare(maxPackedValue, offset, lowerPoint, offset) < 0) {
return Relation.CELL_OUTSIDE_QUERY;
}
crosses |=
Arrays.compareUnsigned(
minPackedValue,
offset,
offset + bytesPerDim,
lowerPoint,
offset,
offset + bytesPerDim)
< 0
|| Arrays.compareUnsigned(
maxPackedValue,
offset,
offset + bytesPerDim,
upperPoint,
offset,
offset + bytesPerDim)
> 0;
comparator.compare(minPackedValue, offset, lowerPoint, offset) < 0
|| comparator.compare(maxPackedValue, offset, upperPoint, offset) > 0;
}
if (crosses) {
@ -322,22 +283,8 @@ public abstract class PointRangeQuery extends Query {
allDocsMatch = true;
for (int i = 0; i < numDims; ++i) {
int offset = i * bytesPerDim;
if (Arrays.compareUnsigned(
lowerPoint,
offset,
offset + bytesPerDim,
fieldPackedLower,
offset,
offset + bytesPerDim)
> 0
|| Arrays.compareUnsigned(
upperPoint,
offset,
offset + bytesPerDim,
fieldPackedUpper,
offset,
offset + bytesPerDim)
< 0) {
if (comparator.compare(lowerPoint, offset, fieldPackedLower, offset) > 0
|| comparator.compare(upperPoint, offset, fieldPackedUpper, offset) < 0) {
allDocsMatch = false;
break;
}

View File

@ -18,7 +18,6 @@
package org.apache.lucene.search.comparators;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReaderContext;
@ -29,6 +28,8 @@ import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ArrayUtil.ByteArrayComparator;
import org.apache.lucene.util.DocIdSetBuilder;
/**
@ -40,6 +41,7 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
protected final String field;
protected final boolean reverse;
private final int bytesCount; // how many bytes are used to encode this number
private final ByteArrayComparator bytesComparator;
protected boolean topValueSet;
protected boolean singleSort; // singleSort is true, if sort is based on a single sort field.
@ -55,6 +57,7 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
// skipping functionality is only relevant for primary sort
this.canSkipDocuments = (sortPos == 0);
this.bytesCount = bytesCount;
this.bytesComparator = ArrayUtil.getUnsignedComparator(bytesCount);
}
@Override
@ -209,17 +212,13 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
return; // already visited or skipped
}
if (maxValueAsBytes != null) {
int cmp =
Arrays.compareUnsigned(
packedValue, 0, bytesCount, maxValueAsBytes, 0, bytesCount);
int cmp = bytesComparator.compare(packedValue, 0, maxValueAsBytes, 0);
// if doc's value is too high or for single sort even equal, it is not competitive
// and the doc can be skipped
if (cmp > 0 || (singleSort && cmp == 0)) return;
}
if (minValueAsBytes != null) {
int cmp =
Arrays.compareUnsigned(
packedValue, 0, bytesCount, minValueAsBytes, 0, bytesCount);
int cmp = bytesComparator.compare(packedValue, 0, minValueAsBytes, 0);
// if doc's value is too low or for single sort even equal, it is not competitive
// and the doc can be skipped
if (cmp < 0 || (singleSort && cmp == 0)) return;
@ -230,27 +229,19 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
@Override
public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
if (maxValueAsBytes != null) {
int cmp =
Arrays.compareUnsigned(
minPackedValue, 0, bytesCount, maxValueAsBytes, 0, bytesCount);
int cmp = bytesComparator.compare(minPackedValue, 0, maxValueAsBytes, 0);
if (cmp > 0 || (singleSort && cmp == 0))
return PointValues.Relation.CELL_OUTSIDE_QUERY;
}
if (minValueAsBytes != null) {
int cmp =
Arrays.compareUnsigned(
maxPackedValue, 0, bytesCount, minValueAsBytes, 0, bytesCount);
int cmp = bytesComparator.compare(maxPackedValue, 0, minValueAsBytes, 0);
if (cmp < 0 || (singleSort && cmp == 0))
return PointValues.Relation.CELL_OUTSIDE_QUERY;
}
if ((maxValueAsBytes != null
&& Arrays.compareUnsigned(
maxPackedValue, 0, bytesCount, maxValueAsBytes, 0, bytesCount)
> 0)
&& bytesComparator.compare(maxPackedValue, 0, maxValueAsBytes, 0) > 0)
|| (minValueAsBytes != null
&& Arrays.compareUnsigned(
minPackedValue, 0, bytesCount, minValueAsBytes, 0, bytesCount)
< 0)) {
&& bytesComparator.compare(minPackedValue, 0, minValueAsBytes, 0) < 0)) {
return PointValues.Relation.CELL_CROSSES_QUERY;
}
return PointValues.Relation.CELL_INSIDE_QUERY;

View File

@ -17,6 +17,7 @@
package org.apache.lucene.util;
import java.lang.reflect.Array;
import java.util.Arrays;
import java.util.Comparator;
/**
@ -656,4 +657,41 @@ public final class ArrayUtil {
System.arraycopy(array, from, copy, 0, subLength);
return copy;
}
/** Comparator for a fixed number of bytes. */
@FunctionalInterface
public static interface ByteArrayComparator {
/**
* Compare bytes starting from the given offsets. The return value has the same contract as
* {@link Comparator#compare(Object, Object)}.
*/
int compare(byte[] a, int aI, byte[] b, int bI);
}
/** Return a comparator for exactly the specified number of bytes. */
public static ByteArrayComparator getUnsignedComparator(int numBytes) {
if (numBytes == Long.BYTES) {
// Used by LongPoint, DoublePoint
return ArrayUtil::compareUnsigned8;
} else if (numBytes == Integer.BYTES) {
// Used by IntPoint, FloatPoint, LatLonPoint, LatLonShape
return ArrayUtil::compareUnsigned4;
} else {
return (a, aOffset, b, bOffset) ->
Arrays.compareUnsigned(a, aOffset, aOffset + numBytes, b, bOffset, bOffset + numBytes);
}
}
/** Compare exactly 8 unsigned bytes from the provided arrays. */
public static int compareUnsigned8(byte[] a, int aOffset, byte[] b, int bOffset) {
return Long.compareUnsigned(
(long) BitUtil.VH_BE_LONG.get(a, aOffset), (long) BitUtil.VH_BE_LONG.get(b, bOffset));
}
/** Compare exactly 4 unsigned bytes from the provided arrays. */
public static int compareUnsigned4(byte[] a, int aOffset, byte[] b, int bOffset) {
return Integer.compareUnsigned(
(int) BitUtil.VH_BE_INT.get(a, aOffset), (int) BitUtil.VH_BE_INT.get(b, bOffset));
}
}

View File

@ -36,6 +36,7 @@ import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.TrackingDirectoryWrapper;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ArrayUtil.ByteArrayComparator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.FixedBitSet;
@ -92,6 +93,8 @@ public class BKDWriter implements Closeable {
/** BKD tree configuration */
protected final BKDConfig config;
private final ByteArrayComparator comparator;
final TrackingDirectoryWrapper tempDir;
final String tempFileNamePrefix;
final double maxMBSortInHeap;
@ -142,6 +145,7 @@ public class BKDWriter implements Closeable {
this.maxDoc = maxDoc;
this.config = config;
this.comparator = ArrayUtil.getUnsignedComparator(config.bytesPerDim);
docsSeen = new FixedBitSet(maxDoc);
@ -217,23 +221,9 @@ public class BKDWriter implements Closeable {
} else {
for (int dim = 0; dim < config.numIndexDims; dim++) {
int offset = dim * config.bytesPerDim;
if (Arrays.compareUnsigned(
packedValue,
offset,
offset + config.bytesPerDim,
minPackedValue,
offset,
offset + config.bytesPerDim)
< 0) {
if (comparator.compare(packedValue, offset, minPackedValue, offset) < 0) {
System.arraycopy(packedValue, offset, minPackedValue, offset, config.bytesPerDim);
} else if (Arrays.compareUnsigned(
packedValue,
offset,
offset + config.bytesPerDim,
maxPackedValue,
offset,
offset + config.bytesPerDim)
> 0) {
} else if (comparator.compare(packedValue, offset, maxPackedValue, offset) > 0) {
System.arraycopy(packedValue, offset, maxPackedValue, offset, config.bytesPerDim);
}
}
@ -345,11 +335,11 @@ public class BKDWriter implements Closeable {
}
private static class BKDMergeQueue extends PriorityQueue<MergeReader> {
private final int bytesPerDim;
private final ByteArrayComparator comparator;
public BKDMergeQueue(int bytesPerDim, int maxSize) {
super(maxSize);
this.bytesPerDim = bytesPerDim;
this.comparator = ArrayUtil.getUnsignedComparator(bytesPerDim);
}
@Override
@ -357,13 +347,7 @@ public class BKDWriter implements Closeable {
assert a != b;
int cmp =
Arrays.compareUnsigned(
a.state.scratchDataPackedValue,
0,
bytesPerDim,
b.state.scratchDataPackedValue,
0,
bytesPerDim);
comparator.compare(a.state.scratchDataPackedValue, 0, b.state.scratchDataPackedValue, 0);
if (cmp < 0) {
return true;
} else if (cmp > 0) {
@ -436,14 +420,8 @@ public class BKDWriter implements Closeable {
values.getValue(i, scratch);
for (int dim = 0; dim < config.numIndexDims; dim++) {
final int startOffset = dim * config.bytesPerDim;
final int endOffset = startOffset + config.bytesPerDim;
if (Arrays.compareUnsigned(
scratch.bytes,
scratch.offset + startOffset,
scratch.offset + endOffset,
minPackedValue,
startOffset,
endOffset)
if (comparator.compare(
scratch.bytes, scratch.offset + startOffset, minPackedValue, startOffset)
< 0) {
System.arraycopy(
scratch.bytes,
@ -451,13 +429,8 @@ public class BKDWriter implements Closeable {
minPackedValue,
startOffset,
config.bytesPerDim);
} else if (Arrays.compareUnsigned(
scratch.bytes,
scratch.offset + startOffset,
scratch.offset + endOffset,
maxPackedValue,
startOffset,
endOffset)
} else if (comparator.compare(
scratch.bytes, scratch.offset + startOffset, maxPackedValue, startOffset)
> 0) {
System.arraycopy(
scratch.bytes,
@ -1439,24 +1412,12 @@ public class BKDWriter implements Closeable {
BytesRef first = packedValues.apply(0);
min.copyBytes(first.bytes, first.offset + offset, length);
max.copyBytes(first.bytes, first.offset + offset, length);
final ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(length);
for (int i = 1; i < count; ++i) {
BytesRef candidate = packedValues.apply(i);
if (Arrays.compareUnsigned(
min.bytes(),
0,
length,
candidate.bytes,
candidate.offset + offset,
candidate.offset + offset + length)
> 0) {
if (comparator.compare(min.bytes(), 0, candidate.bytes, candidate.offset + offset) > 0) {
min.copyBytes(candidate.bytes, candidate.offset + offset, length);
} else if (Arrays.compareUnsigned(
max.bytes(),
0,
length,
candidate.bytes,
candidate.offset + offset,
candidate.offset + offset + length)
} else if (comparator.compare(max.bytes(), 0, candidate.bytes, candidate.offset + offset)
< 0) {
max.copyBytes(candidate.bytes, candidate.offset + offset, length);
}
@ -1565,14 +1526,7 @@ public class BKDWriter implements Closeable {
for (int dim = 0; dim < config.numIndexDims; ++dim) {
final int offset = dim * config.bytesPerDim;
if (parentSplits[dim] < maxNumSplits / 2
&& Arrays.compareUnsigned(
minPackedValue,
offset,
offset + config.bytesPerDim,
maxPackedValue,
offset,
offset + config.bytesPerDim)
!= 0) {
&& comparator.compare(minPackedValue, offset, maxPackedValue, offset) != 0) {
return dim;
}
}
@ -1581,10 +1535,7 @@ public class BKDWriter implements Closeable {
int splitDim = -1;
for (int dim = 0; dim < config.numIndexDims; dim++) {
NumericUtils.subtract(config.bytesPerDim, dim, maxPackedValue, minPackedValue, scratchDiff);
if (splitDim == -1
|| Arrays.compareUnsigned(
scratchDiff, 0, config.bytesPerDim, scratch1, 0, config.bytesPerDim)
> 0) {
if (splitDim == -1 || comparator.compare(scratchDiff, 0, scratch1, 0) > 0) {
System.arraycopy(scratchDiff, 0, scratch1, 0, config.bytesPerDim);
splitDim = dim;
}
@ -1878,14 +1829,8 @@ public class BKDWriter implements Closeable {
value = reader.pointValue().packedValue();
for (int dim = 0; dim < config.numIndexDims; dim++) {
final int startOffset = dim * config.bytesPerDim;
final int endOffset = startOffset + config.bytesPerDim;
if (Arrays.compareUnsigned(
value.bytes,
value.offset + startOffset,
value.offset + endOffset,
minPackedValue,
startOffset,
endOffset)
if (comparator.compare(
value.bytes, value.offset + startOffset, minPackedValue, startOffset)
< 0) {
System.arraycopy(
value.bytes,
@ -1893,13 +1838,8 @@ public class BKDWriter implements Closeable {
minPackedValue,
startOffset,
config.bytesPerDim);
} else if (Arrays.compareUnsigned(
value.bytes,
value.offset + startOffset,
value.offset + endOffset,
maxPackedValue,
startOffset,
endOffset)
} else if (comparator.compare(
value.bytes, value.offset + startOffset, maxPackedValue, startOffset)
> 0) {
System.arraycopy(
value.bytes,

View File

@ -431,4 +431,50 @@ public class TestArrayUtil extends LuceneTestCase {
assertEquals(0, copyOfSubArray(objectArray, 1, 1).length);
expectThrows(IndexOutOfBoundsException.class, () -> copyOfSubArray(objectArray, 2, 5));
}
public void testCompareUnsigned4() {
int aI = TestUtil.nextInt(random(), 0, 3);
byte[] a = new byte[Integer.BYTES + aI];
int bI = TestUtil.nextInt(random(), 0, 3);
byte[] b = new byte[Integer.BYTES + bI];
for (int i = 0; i < Integer.BYTES; ++i) {
a[aI + i] = (byte) random().nextInt(1 << 8);
do {
b[bI + i] = (byte) random().nextInt(1 << 8);
} while (b[bI + i] == a[aI + i]);
}
for (int i = 0; i < Integer.BYTES; ++i) {
int expected = Arrays.compareUnsigned(a, aI, aI + Integer.BYTES, b, bI, bI + Integer.BYTES);
int actual = ArrayUtil.compareUnsigned4(a, aI, b, bI);
assertEquals(Integer.signum(expected), Integer.signum(actual));
b[bI + i] = a[aI + i];
}
assertEquals(0, ArrayUtil.compareUnsigned4(a, aI, b, bI));
}
public void testCompareUnsigned8() {
int aI = TestUtil.nextInt(random(), 0, 7);
byte[] a = new byte[Long.BYTES + aI];
int bI = TestUtil.nextInt(random(), 0, 3);
byte[] b = new byte[Long.BYTES + bI];
for (int i = 0; i < Long.BYTES; ++i) {
a[aI + i] = (byte) random().nextInt(1 << 8);
do {
b[bI + i] = (byte) random().nextInt(1 << 8);
} while (b[bI + i] == a[aI + i]);
}
for (int i = 0; i < Long.BYTES; ++i) {
int expected = Arrays.compareUnsigned(a, aI, aI + Long.BYTES, b, bI, bI + Long.BYTES);
int actual = ArrayUtil.compareUnsigned8(a, aI, b, bI);
assertEquals(Integer.signum(expected), Integer.signum(actual));
b[bI + i] = a[aI + i];
}
assertEquals(0, ArrayUtil.compareUnsigned8(a, aI, b, bI));
}
}