mirror of https://github.com/apache/lucene.git
LUCENE-10232: Fix MultiRangeQuery to confirm all dimensions for a given range match (#437)
This commit is contained in:
parent
46a5a57724
commit
f48a430f35
|
@ -74,7 +74,8 @@ Optimizations
|
|||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
* LUCENE-10232: Fix MultiRangeQuery to confirm all dimensions for a given range match. (Greg Miller)
|
||||
|
||||
Other
|
||||
---------------------
|
||||
|
|
|
@ -149,6 +149,8 @@ public abstract class MultiRangeQuery extends Query {
|
|||
public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
|
||||
throws IOException {
|
||||
|
||||
final ArrayUtil.ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(bytesPerDim);
|
||||
|
||||
// We don't use RandomAccessWeight here: it's no good to approximate with "match all docs".
|
||||
// This is an inverted structure and should be used in the first pass:
|
||||
|
||||
|
@ -172,31 +174,21 @@ public abstract class MultiRangeQuery extends Query {
|
|||
@Override
|
||||
public void visit(int docID, byte[] packedValue) {
|
||||
// If a single OR clause has the value in range, the entire query accepts the value
|
||||
continueRange:
|
||||
for (RangeClause rangeClause : rangeClauses) {
|
||||
for (int dim = 0; dim < numDims; dim++) {
|
||||
int offset = dim * bytesPerDim;
|
||||
if ((Arrays.compareUnsigned(
|
||||
packedValue,
|
||||
offset,
|
||||
offset + bytesPerDim,
|
||||
rangeClause.lowerValue,
|
||||
offset,
|
||||
offset + bytesPerDim)
|
||||
>= 0)
|
||||
&& (Arrays.compareUnsigned(
|
||||
packedValue,
|
||||
offset,
|
||||
offset + bytesPerDim,
|
||||
rangeClause.upperValue,
|
||||
offset,
|
||||
offset + bytesPerDim)
|
||||
<= 0)) {
|
||||
// Doc is in-bounds. Add and short circuit
|
||||
adder.add(docID);
|
||||
return;
|
||||
if (comparator.compare(packedValue, offset, rangeClause.lowerValue, offset) < 0) {
|
||||
// Doc value is too low in this dim:
|
||||
continue continueRange;
|
||||
}
|
||||
if (comparator.compare(packedValue, offset, rangeClause.upperValue, offset) > 0) {
|
||||
// Doc value is too high in this dim:
|
||||
continue continueRange;
|
||||
}
|
||||
// Iterate till we have any OR clauses remaining
|
||||
}
|
||||
// Doc matched on all dimensions:
|
||||
adder.add(docID);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -211,47 +203,35 @@ public abstract class MultiRangeQuery extends Query {
|
|||
* as inside and atleast one range sees the point as CROSSES, return CROSSES 3) If none
|
||||
* of the above, return OUTSIDE
|
||||
*/
|
||||
continueRange:
|
||||
for (RangeClause rangeClause : rangeClauses) {
|
||||
boolean rangeCrosses = false;
|
||||
|
||||
for (int dim = 0; dim < numDims; dim++) {
|
||||
int offset = dim * bytesPerDim;
|
||||
|
||||
if ((Arrays.compareUnsigned(
|
||||
minPackedValue,
|
||||
offset,
|
||||
offset + bytesPerDim,
|
||||
rangeClause.lowerValue,
|
||||
offset,
|
||||
offset + bytesPerDim)
|
||||
>= 0)
|
||||
&& (Arrays.compareUnsigned(
|
||||
maxPackedValue,
|
||||
offset,
|
||||
offset + bytesPerDim,
|
||||
rangeClause.upperValue,
|
||||
offset,
|
||||
offset + bytesPerDim)
|
||||
<= 0)) {
|
||||
return PointValues.Relation.CELL_INSIDE_QUERY;
|
||||
if (comparator.compare(minPackedValue, offset, rangeClause.upperValue, offset) > 0
|
||||
|| comparator.compare(maxPackedValue, offset, rangeClause.lowerValue, offset)
|
||||
< 0) {
|
||||
continue continueRange;
|
||||
}
|
||||
|
||||
crosses |=
|
||||
Arrays.compareUnsigned(
|
||||
minPackedValue,
|
||||
offset,
|
||||
offset + bytesPerDim,
|
||||
rangeClause.lowerValue,
|
||||
offset,
|
||||
offset + bytesPerDim)
|
||||
< 0
|
||||
|| Arrays.compareUnsigned(
|
||||
maxPackedValue,
|
||||
offset,
|
||||
offset + bytesPerDim,
|
||||
rangeClause.upperValue,
|
||||
offset,
|
||||
offset + bytesPerDim)
|
||||
rangeCrosses |=
|
||||
comparator.compare(minPackedValue, offset, rangeClause.lowerValue, offset) < 0
|
||||
|| comparator.compare(
|
||||
maxPackedValue, offset, rangeClause.upperValue, offset)
|
||||
> 0;
|
||||
}
|
||||
|
||||
if (rangeCrosses == false) {
|
||||
// At this point we know that the cell is fully inside the range clause, so we
|
||||
// return early:
|
||||
return PointValues.Relation.CELL_INSIDE_QUERY;
|
||||
} else {
|
||||
// This range clause crosses the cell, but we'll keep checking more ranges to see if
|
||||
// one fully contains the cell:
|
||||
crosses = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (crosses) {
|
||||
|
@ -300,21 +280,8 @@ public abstract class MultiRangeQuery extends Query {
|
|||
for (RangeClause rangeClause : rangeClauses) {
|
||||
for (int i = 0; i < numDims; ++i) {
|
||||
int offset = i * bytesPerDim;
|
||||
if (Arrays.compareUnsigned(
|
||||
rangeClause.lowerValue,
|
||||
offset,
|
||||
offset + bytesPerDim,
|
||||
fieldPackedLower,
|
||||
offset,
|
||||
offset + bytesPerDim)
|
||||
> 0
|
||||
|| Arrays.compareUnsigned(
|
||||
rangeClause.upperValue,
|
||||
offset,
|
||||
offset + bytesPerDim,
|
||||
fieldPackedUpper,
|
||||
offset,
|
||||
offset + bytesPerDim)
|
||||
if (comparator.compare(rangeClause.lowerValue, offset, fieldPackedLower, offset) > 0
|
||||
|| comparator.compare(rangeClause.upperValue, offset, fieldPackedUpper, offset)
|
||||
< 0) {
|
||||
allDocsMatch = false;
|
||||
break;
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
package org.apache.lucene.sandbox.search;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.DoublePoint;
|
||||
|
@ -29,14 +30,71 @@ import org.apache.lucene.sandbox.document.DoublePointMultiRangeBuilder;
|
|||
import org.apache.lucene.sandbox.document.FloatPointMultiRangeBuilder;
|
||||
import org.apache.lucene.sandbox.document.IntPointMultiRangeBuilder;
|
||||
import org.apache.lucene.sandbox.document.LongPointMultiRangeBuilder;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestMultiRangeQueries extends LuceneTestCase {
|
||||
|
||||
public void testDuelWithStandardDisjunction() throws IOException {
|
||||
int iterations = LuceneTestCase.TEST_NIGHTLY ? atLeast(100) : 10;
|
||||
for (int iter = 0; iter < iterations; iter++) {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
|
||||
int dims = RandomNumbers.randomIntBetween(random(), 1, 8);
|
||||
|
||||
long[] scratch = new long[dims];
|
||||
for (int i = 0; i < 100; i++) {
|
||||
int numPoints = RandomNumbers.randomIntBetween(random(), 1, 10);
|
||||
Document doc = new Document();
|
||||
for (int j = 0; j < numPoints; j++) {
|
||||
for (int v = 0; v < dims; v++) {
|
||||
scratch[v] = RandomNumbers.randomLongBetween(random(), 0, 100);
|
||||
}
|
||||
doc.add(new LongPoint("point", scratch));
|
||||
}
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
IndexReader reader = w.getReader();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
int numRanges = RandomNumbers.randomIntBetween(random(), 1, 20);
|
||||
LongPointMultiRangeBuilder builder1 = new LongPointMultiRangeBuilder("point", dims);
|
||||
BooleanQuery.Builder builder2 = new BooleanQuery.Builder();
|
||||
for (int i = 0; i < numRanges; i++) {
|
||||
long[] lower = new long[dims];
|
||||
long[] upper = new long[dims];
|
||||
for (int j = 0; j < dims; j++) {
|
||||
lower[j] = RandomNumbers.randomLongBetween(random(), -100, 200);
|
||||
upper[j] = lower[j] + RandomNumbers.randomLongBetween(random(), 0, 100);
|
||||
}
|
||||
builder1.add(lower, upper);
|
||||
builder2.add(LongPoint.newRangeQuery("point", lower, upper), BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
|
||||
Query query1 = builder1.build();
|
||||
Query query2 = builder2.build();
|
||||
TopDocs result1 = searcher.search(query1, 10, Sort.INDEXORDER);
|
||||
TopDocs result2 = searcher.search(query2, 10, Sort.INDEXORDER);
|
||||
assertEquals(result2.totalHits, result1.totalHits);
|
||||
assertEquals(result2.scoreDocs.length, result1.scoreDocs.length);
|
||||
for (int i = 0; i < result2.scoreDocs.length; i++) {
|
||||
assertEquals(result2.scoreDocs[i].doc, result1.scoreDocs[i].doc);
|
||||
}
|
||||
|
||||
IOUtils.close(reader, w, dir);
|
||||
}
|
||||
}
|
||||
|
||||
public void testDoubleRandomMultiRangeQuery() throws IOException {
|
||||
final int numDims = TestUtil.nextInt(random(), 1, 3);
|
||||
final int numVals = TestUtil.nextInt(random(), 3, 8);
|
||||
|
@ -123,9 +181,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
|
|||
|
||||
assertEquals(searcher.count(query), 2);
|
||||
|
||||
// None match
|
||||
double[] nonMatchingFirstRangeLower = {1.3, 3.5, 2.7};
|
||||
double[] nonMatchingFirstRangeUpper = {5.2, 8.3, 7.8};
|
||||
// None match (one dimension is in range but the rest aren't)
|
||||
double[] nonMatchingFirstRangeLower = {111.3, 3.5, 2.7};
|
||||
double[] nonMatchingFirstRangeUpper = {117.3, 8.3, 7.8};
|
||||
|
||||
double[] nonMatchingSecondRangeLower = {11246.3, 19388.7, 21248.4};
|
||||
double[] nonMatchingSecondRangeUpper = {13242.9, 20214.2, 23236.5};
|
||||
|
@ -244,9 +302,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
|
|||
|
||||
assertEquals(searcher.count(query), 2);
|
||||
|
||||
// None match
|
||||
long[] nonMatchingFirstRangeLower = {1, 3, 2};
|
||||
long[] nonMatchingFirstRangeUpper = {5, 8, 7};
|
||||
// None match (one dimension is in range but the rest aren't)
|
||||
long[] nonMatchingFirstRangeLower = {111, 3, 2};
|
||||
long[] nonMatchingFirstRangeUpper = {117, 8, 7};
|
||||
|
||||
long[] nonMatchingSecondRangeLower = {11246, 19388, 21248};
|
||||
long[] nonMatchingSecondRangeUpper = {13242, 20214, 23236};
|
||||
|
@ -365,9 +423,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
|
|||
|
||||
assertEquals(searcher.count(query), 2);
|
||||
|
||||
// None Match
|
||||
float[] nonMatchingFirstRangeLower = {1.4f, 3.3f, 2.7f};
|
||||
float[] nonMatchingFirstRangeUpper = {5.4f, 8.2f, 7.3f};
|
||||
// None match (one dimension is in range but the rest aren't)
|
||||
float[] nonMatchingFirstRangeLower = {111.3f, 3.3f, 2.7f};
|
||||
float[] nonMatchingFirstRangeUpper = {117.3f, 8.2f, 7.3f};
|
||||
|
||||
float[] nonMatchingSecondRangeLower = {11246.2f, 19388.6f, 21248.3f};
|
||||
float[] nonMatchingSecondRangeUpper = {13242.4f, 20214.7f, 23236.3f};
|
||||
|
@ -486,9 +544,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
|
|||
|
||||
assertEquals(searcher.count(query), 2);
|
||||
|
||||
// None match
|
||||
int[] nonMatchingFirstRangeLower = {1, 3, 2};
|
||||
int[] nonMatchingFirstRangeUpper = {5, 8, 7};
|
||||
// None match (one dimension is in range but the rest aren't)
|
||||
int[] nonMatchingFirstRangeLower = {111, 3, 2};
|
||||
int[] nonMatchingFirstRangeUpper = {117, 8, 7};
|
||||
|
||||
int[] nonMatchingSecondRangeLower = {11246, 19388, 21248};
|
||||
int[] nonMatchingSecondRangeUpper = {13242, 20214, 23236};
|
||||
|
|
Loading…
Reference in New Issue