LUCENE-10678: Fix potential overflow when computing the partition point on the BKD tree (#1065)

We currently compute the partition point for a set of points by multiplying the number of nodes that needs to be on
 the left of the BKD tree by the maxPointsInLeafNode. This multiplication is done on the integer space so if the partition point is bigger than Integer.MAX_VALUE it will overflow. This commit moves the multiplication to the long space so it doesn't overflow.
This commit is contained in:
Ignacio Vera 2022-08-11 15:25:53 +02:00 committed by GitHub
parent a693fe819b
commit fe8d11254a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 11 additions and 8 deletions

View File

@ -116,6 +116,9 @@ Bug Fixes
* LUCENE-10673: Improve check of equality for latitudes for spatial3d GeoBoundingBox (ignacio Vera)
* LUCENE-10678: Fix potential overflow when building a BKD tree with more than 4 billion points. The overflow
occurs when computing the partition point. (Ignacio Vera)
Build
---------------------

View File

@ -2018,7 +2018,7 @@ public class BKDWriter implements Closeable {
// How many leaves will be in the left tree:
final int numLeftLeafNodes = getNumLeftLeafNodes(numLeaves);
// How many points will be in the left tree:
final long leftCount = numLeftLeafNodes * config.maxPointsInLeafNode;
final long leftCount = numLeftLeafNodes * (long) config.maxPointsInLeafNode;
BKDRadixSelector.PathSlice[] slices = new BKDRadixSelector.PathSlice[2];

View File

@ -28,19 +28,19 @@ import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.LuceneTestCase.Monster;
import org.apache.lucene.util.NumericUtils;
// e.g. run like this: ant test -Dtestcase=Test2BBKDPoints -Dtests.nightly=true -Dtests.verbose=true
// e.g. run like this: ant test -Dtestcase=Test4BBKDPoints -Dtests.nightly=true -Dtests.verbose=true
// -Dtests.monster=true
//
// or: python -u /l/util/src/python/repeatLuceneTest.py -heap 4g -once -nolog -tmpDir /b/tmp
// -logDir /l/logs Test2BBKDPoints.test2D -verbose
// -logDir /l/logs Test4BBKDPoints.test2D -verbose
@TimeoutSuite(millis = Integer.MAX_VALUE) // hopefully ~24 days is long enough ;)
@Monster("takes at least 4 hours and consumes many GB of temp disk space")
public class Test2BBKDPoints extends LuceneTestCase {
public class Test4BBKDPoints extends LuceneTestCase {
public void test1D() throws Exception {
Directory dir = FSDirectory.open(createTempDir("2BBKDPoints1D"));
Directory dir = FSDirectory.open(createTempDir("4BBKDPoints1D"));
final int numDocs = (Integer.MAX_VALUE / 26) + 100;
final int numDocs = (Integer.MAX_VALUE / 13) + 100;
BKDWriter w =
new BKDWriter(
@ -83,9 +83,9 @@ public class Test2BBKDPoints extends LuceneTestCase {
}
public void test2D() throws Exception {
Directory dir = FSDirectory.open(createTempDir("2BBKDPoints2D"));
Directory dir = FSDirectory.open(createTempDir("4BBKDPoints2D"));
final int numDocs = (Integer.MAX_VALUE / 26) + 100;
final int numDocs = (Integer.MAX_VALUE / 13) + 100;
BKDWriter w =
new BKDWriter(