LUCENE-10232: Fix MultiRangeQuery to confirm all dimensions for a given range match (#437)

This commit is contained in:
Greg Miller 2021-11-30 11:58:38 -08:00 committed by GitHub
parent 46a5a57724
commit f48a430f35
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 107 additions and 81 deletions

View File

@ -74,7 +74,8 @@ Optimizations
Bug Fixes Bug Fixes
--------------------- ---------------------
(No changes)
* LUCENE-10232: Fix MultiRangeQuery to confirm all dimensions for a given range match. (Greg Miller)
Other Other
--------------------- ---------------------

View File

@ -149,6 +149,8 @@ public abstract class MultiRangeQuery extends Query {
public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException { throws IOException {
final ArrayUtil.ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(bytesPerDim);
// We don't use RandomAccessWeight here: it's no good to approximate with "match all docs". // We don't use RandomAccessWeight here: it's no good to approximate with "match all docs".
// This is an inverted structure and should be used in the first pass: // This is an inverted structure and should be used in the first pass:
@ -172,31 +174,21 @@ public abstract class MultiRangeQuery extends Query {
@Override @Override
public void visit(int docID, byte[] packedValue) { public void visit(int docID, byte[] packedValue) {
// If a single OR clause has the value in range, the entire query accepts the value // If a single OR clause has the value in range, the entire query accepts the value
continueRange:
for (RangeClause rangeClause : rangeClauses) { for (RangeClause rangeClause : rangeClauses) {
for (int dim = 0; dim < numDims; dim++) { for (int dim = 0; dim < numDims; dim++) {
int offset = dim * bytesPerDim; int offset = dim * bytesPerDim;
if ((Arrays.compareUnsigned( if (comparator.compare(packedValue, offset, rangeClause.lowerValue, offset) < 0) {
packedValue, // Doc value is too low in this dim:
offset, continue continueRange;
offset + bytesPerDim, }
rangeClause.lowerValue, if (comparator.compare(packedValue, offset, rangeClause.upperValue, offset) > 0) {
offset, // Doc value is too high in this dim:
offset + bytesPerDim) continue continueRange;
>= 0)
&& (Arrays.compareUnsigned(
packedValue,
offset,
offset + bytesPerDim,
rangeClause.upperValue,
offset,
offset + bytesPerDim)
<= 0)) {
// Doc is in-bounds. Add and short circuit
adder.add(docID);
return;
} }
// Iterate till we have any OR clauses remaining
} }
// Doc matched on all dimensions:
adder.add(docID);
} }
} }
@ -211,47 +203,35 @@ public abstract class MultiRangeQuery extends Query {
* as inside and atleast one range sees the point as CROSSES, return CROSSES 3) If none * as inside and atleast one range sees the point as CROSSES, return CROSSES 3) If none
* of the above, return OUTSIDE * of the above, return OUTSIDE
*/ */
continueRange:
for (RangeClause rangeClause : rangeClauses) { for (RangeClause rangeClause : rangeClauses) {
boolean rangeCrosses = false;
for (int dim = 0; dim < numDims; dim++) { for (int dim = 0; dim < numDims; dim++) {
int offset = dim * bytesPerDim; int offset = dim * bytesPerDim;
if ((Arrays.compareUnsigned( if (comparator.compare(minPackedValue, offset, rangeClause.upperValue, offset) > 0
minPackedValue, || comparator.compare(maxPackedValue, offset, rangeClause.lowerValue, offset)
offset, < 0) {
offset + bytesPerDim, continue continueRange;
rangeClause.lowerValue,
offset,
offset + bytesPerDim)
>= 0)
&& (Arrays.compareUnsigned(
maxPackedValue,
offset,
offset + bytesPerDim,
rangeClause.upperValue,
offset,
offset + bytesPerDim)
<= 0)) {
return PointValues.Relation.CELL_INSIDE_QUERY;
} }
crosses |= rangeCrosses |=
Arrays.compareUnsigned( comparator.compare(minPackedValue, offset, rangeClause.lowerValue, offset) < 0
minPackedValue, || comparator.compare(
offset, maxPackedValue, offset, rangeClause.upperValue, offset)
offset + bytesPerDim,
rangeClause.lowerValue,
offset,
offset + bytesPerDim)
< 0
|| Arrays.compareUnsigned(
maxPackedValue,
offset,
offset + bytesPerDim,
rangeClause.upperValue,
offset,
offset + bytesPerDim)
> 0; > 0;
} }
if (rangeCrosses == false) {
// At this point we know that the cell is fully inside the range clause, so we
// return early:
return PointValues.Relation.CELL_INSIDE_QUERY;
} else {
// This range clause crosses the cell, but we'll keep checking more ranges to see if
// one fully contains the cell:
crosses = true;
}
} }
if (crosses) { if (crosses) {
@ -300,21 +280,8 @@ public abstract class MultiRangeQuery extends Query {
for (RangeClause rangeClause : rangeClauses) { for (RangeClause rangeClause : rangeClauses) {
for (int i = 0; i < numDims; ++i) { for (int i = 0; i < numDims; ++i) {
int offset = i * bytesPerDim; int offset = i * bytesPerDim;
if (Arrays.compareUnsigned( if (comparator.compare(rangeClause.lowerValue, offset, fieldPackedLower, offset) > 0
rangeClause.lowerValue, || comparator.compare(rangeClause.upperValue, offset, fieldPackedUpper, offset)
offset,
offset + bytesPerDim,
fieldPackedLower,
offset,
offset + bytesPerDim)
> 0
|| Arrays.compareUnsigned(
rangeClause.upperValue,
offset,
offset + bytesPerDim,
fieldPackedUpper,
offset,
offset + bytesPerDim)
< 0) { < 0) {
allDocsMatch = false; allDocsMatch = false;
break; break;

View File

@ -17,6 +17,7 @@
package org.apache.lucene.sandbox.search; package org.apache.lucene.sandbox.search;
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.DoublePoint;
@ -29,14 +30,71 @@ import org.apache.lucene.sandbox.document.DoublePointMultiRangeBuilder;
import org.apache.lucene.sandbox.document.FloatPointMultiRangeBuilder; import org.apache.lucene.sandbox.document.FloatPointMultiRangeBuilder;
import org.apache.lucene.sandbox.document.IntPointMultiRangeBuilder; import org.apache.lucene.sandbox.document.IntPointMultiRangeBuilder;
import org.apache.lucene.sandbox.document.LongPointMultiRangeBuilder; import org.apache.lucene.sandbox.document.LongPointMultiRangeBuilder;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
public class TestMultiRangeQueries extends LuceneTestCase { public class TestMultiRangeQueries extends LuceneTestCase {
public void testDuelWithStandardDisjunction() throws IOException {
int iterations = LuceneTestCase.TEST_NIGHTLY ? atLeast(100) : 10;
for (int iter = 0; iter < iterations; iter++) {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
int dims = RandomNumbers.randomIntBetween(random(), 1, 8);
long[] scratch = new long[dims];
for (int i = 0; i < 100; i++) {
int numPoints = RandomNumbers.randomIntBetween(random(), 1, 10);
Document doc = new Document();
for (int j = 0; j < numPoints; j++) {
for (int v = 0; v < dims; v++) {
scratch[v] = RandomNumbers.randomLongBetween(random(), 0, 100);
}
doc.add(new LongPoint("point", scratch));
}
w.addDocument(doc);
}
IndexReader reader = w.getReader();
IndexSearcher searcher = newSearcher(reader);
int numRanges = RandomNumbers.randomIntBetween(random(), 1, 20);
LongPointMultiRangeBuilder builder1 = new LongPointMultiRangeBuilder("point", dims);
BooleanQuery.Builder builder2 = new BooleanQuery.Builder();
for (int i = 0; i < numRanges; i++) {
long[] lower = new long[dims];
long[] upper = new long[dims];
for (int j = 0; j < dims; j++) {
lower[j] = RandomNumbers.randomLongBetween(random(), -100, 200);
upper[j] = lower[j] + RandomNumbers.randomLongBetween(random(), 0, 100);
}
builder1.add(lower, upper);
builder2.add(LongPoint.newRangeQuery("point", lower, upper), BooleanClause.Occur.SHOULD);
}
Query query1 = builder1.build();
Query query2 = builder2.build();
TopDocs result1 = searcher.search(query1, 10, Sort.INDEXORDER);
TopDocs result2 = searcher.search(query2, 10, Sort.INDEXORDER);
assertEquals(result2.totalHits, result1.totalHits);
assertEquals(result2.scoreDocs.length, result1.scoreDocs.length);
for (int i = 0; i < result2.scoreDocs.length; i++) {
assertEquals(result2.scoreDocs[i].doc, result1.scoreDocs[i].doc);
}
IOUtils.close(reader, w, dir);
}
}
public void testDoubleRandomMultiRangeQuery() throws IOException { public void testDoubleRandomMultiRangeQuery() throws IOException {
final int numDims = TestUtil.nextInt(random(), 1, 3); final int numDims = TestUtil.nextInt(random(), 1, 3);
final int numVals = TestUtil.nextInt(random(), 3, 8); final int numVals = TestUtil.nextInt(random(), 3, 8);
@ -123,9 +181,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
assertEquals(searcher.count(query), 2); assertEquals(searcher.count(query), 2);
// None match // None match (one dimension is in range but the rest aren't)
double[] nonMatchingFirstRangeLower = {1.3, 3.5, 2.7}; double[] nonMatchingFirstRangeLower = {111.3, 3.5, 2.7};
double[] nonMatchingFirstRangeUpper = {5.2, 8.3, 7.8}; double[] nonMatchingFirstRangeUpper = {117.3, 8.3, 7.8};
double[] nonMatchingSecondRangeLower = {11246.3, 19388.7, 21248.4}; double[] nonMatchingSecondRangeLower = {11246.3, 19388.7, 21248.4};
double[] nonMatchingSecondRangeUpper = {13242.9, 20214.2, 23236.5}; double[] nonMatchingSecondRangeUpper = {13242.9, 20214.2, 23236.5};
@ -244,9 +302,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
assertEquals(searcher.count(query), 2); assertEquals(searcher.count(query), 2);
// None match // None match (one dimension is in range but the rest aren't)
long[] nonMatchingFirstRangeLower = {1, 3, 2}; long[] nonMatchingFirstRangeLower = {111, 3, 2};
long[] nonMatchingFirstRangeUpper = {5, 8, 7}; long[] nonMatchingFirstRangeUpper = {117, 8, 7};
long[] nonMatchingSecondRangeLower = {11246, 19388, 21248}; long[] nonMatchingSecondRangeLower = {11246, 19388, 21248};
long[] nonMatchingSecondRangeUpper = {13242, 20214, 23236}; long[] nonMatchingSecondRangeUpper = {13242, 20214, 23236};
@ -365,9 +423,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
assertEquals(searcher.count(query), 2); assertEquals(searcher.count(query), 2);
// None Match // None match (one dimension is in range but the rest aren't)
float[] nonMatchingFirstRangeLower = {1.4f, 3.3f, 2.7f}; float[] nonMatchingFirstRangeLower = {111.3f, 3.3f, 2.7f};
float[] nonMatchingFirstRangeUpper = {5.4f, 8.2f, 7.3f}; float[] nonMatchingFirstRangeUpper = {117.3f, 8.2f, 7.3f};
float[] nonMatchingSecondRangeLower = {11246.2f, 19388.6f, 21248.3f}; float[] nonMatchingSecondRangeLower = {11246.2f, 19388.6f, 21248.3f};
float[] nonMatchingSecondRangeUpper = {13242.4f, 20214.7f, 23236.3f}; float[] nonMatchingSecondRangeUpper = {13242.4f, 20214.7f, 23236.3f};
@ -486,9 +544,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
assertEquals(searcher.count(query), 2); assertEquals(searcher.count(query), 2);
// None match // None match (one dimension is in range but the rest aren't)
int[] nonMatchingFirstRangeLower = {1, 3, 2}; int[] nonMatchingFirstRangeLower = {111, 3, 2};
int[] nonMatchingFirstRangeUpper = {5, 8, 7}; int[] nonMatchingFirstRangeUpper = {117, 8, 7};
int[] nonMatchingSecondRangeLower = {11246, 19388, 21248}; int[] nonMatchingSecondRangeLower = {11246, 19388, 21248};
int[] nonMatchingSecondRangeUpper = {13242, 20214, 23236}; int[] nonMatchingSecondRangeUpper = {13242, 20214, 23236};