mirror of https://github.com/apache/lucene.git
LUCENE-10232: Fix MultiRangeQuery to confirm all dimensions for a given range match (#437)
This commit is contained in:
parent
46a5a57724
commit
f48a430f35
|
@ -74,7 +74,8 @@ Optimizations
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
---------------------
|
---------------------
|
||||||
(No changes)
|
|
||||||
|
* LUCENE-10232: Fix MultiRangeQuery to confirm all dimensions for a given range match. (Greg Miller)
|
||||||
|
|
||||||
Other
|
Other
|
||||||
---------------------
|
---------------------
|
||||||
|
|
|
@ -149,6 +149,8 @@ public abstract class MultiRangeQuery extends Query {
|
||||||
public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
|
public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
|
final ArrayUtil.ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(bytesPerDim);
|
||||||
|
|
||||||
// We don't use RandomAccessWeight here: it's no good to approximate with "match all docs".
|
// We don't use RandomAccessWeight here: it's no good to approximate with "match all docs".
|
||||||
// This is an inverted structure and should be used in the first pass:
|
// This is an inverted structure and should be used in the first pass:
|
||||||
|
|
||||||
|
@ -172,31 +174,21 @@ public abstract class MultiRangeQuery extends Query {
|
||||||
@Override
|
@Override
|
||||||
public void visit(int docID, byte[] packedValue) {
|
public void visit(int docID, byte[] packedValue) {
|
||||||
// If a single OR clause has the value in range, the entire query accepts the value
|
// If a single OR clause has the value in range, the entire query accepts the value
|
||||||
|
continueRange:
|
||||||
for (RangeClause rangeClause : rangeClauses) {
|
for (RangeClause rangeClause : rangeClauses) {
|
||||||
for (int dim = 0; dim < numDims; dim++) {
|
for (int dim = 0; dim < numDims; dim++) {
|
||||||
int offset = dim * bytesPerDim;
|
int offset = dim * bytesPerDim;
|
||||||
if ((Arrays.compareUnsigned(
|
if (comparator.compare(packedValue, offset, rangeClause.lowerValue, offset) < 0) {
|
||||||
packedValue,
|
// Doc value is too low in this dim:
|
||||||
offset,
|
continue continueRange;
|
||||||
offset + bytesPerDim,
|
}
|
||||||
rangeClause.lowerValue,
|
if (comparator.compare(packedValue, offset, rangeClause.upperValue, offset) > 0) {
|
||||||
offset,
|
// Doc value is too high in this dim:
|
||||||
offset + bytesPerDim)
|
continue continueRange;
|
||||||
>= 0)
|
|
||||||
&& (Arrays.compareUnsigned(
|
|
||||||
packedValue,
|
|
||||||
offset,
|
|
||||||
offset + bytesPerDim,
|
|
||||||
rangeClause.upperValue,
|
|
||||||
offset,
|
|
||||||
offset + bytesPerDim)
|
|
||||||
<= 0)) {
|
|
||||||
// Doc is in-bounds. Add and short circuit
|
|
||||||
adder.add(docID);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
// Iterate till we have any OR clauses remaining
|
|
||||||
}
|
}
|
||||||
|
// Doc matched on all dimensions:
|
||||||
|
adder.add(docID);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -211,47 +203,35 @@ public abstract class MultiRangeQuery extends Query {
|
||||||
* as inside and atleast one range sees the point as CROSSES, return CROSSES 3) If none
|
* as inside and atleast one range sees the point as CROSSES, return CROSSES 3) If none
|
||||||
* of the above, return OUTSIDE
|
* of the above, return OUTSIDE
|
||||||
*/
|
*/
|
||||||
|
continueRange:
|
||||||
for (RangeClause rangeClause : rangeClauses) {
|
for (RangeClause rangeClause : rangeClauses) {
|
||||||
|
boolean rangeCrosses = false;
|
||||||
|
|
||||||
for (int dim = 0; dim < numDims; dim++) {
|
for (int dim = 0; dim < numDims; dim++) {
|
||||||
int offset = dim * bytesPerDim;
|
int offset = dim * bytesPerDim;
|
||||||
|
|
||||||
if ((Arrays.compareUnsigned(
|
if (comparator.compare(minPackedValue, offset, rangeClause.upperValue, offset) > 0
|
||||||
minPackedValue,
|
|| comparator.compare(maxPackedValue, offset, rangeClause.lowerValue, offset)
|
||||||
offset,
|
< 0) {
|
||||||
offset + bytesPerDim,
|
continue continueRange;
|
||||||
rangeClause.lowerValue,
|
|
||||||
offset,
|
|
||||||
offset + bytesPerDim)
|
|
||||||
>= 0)
|
|
||||||
&& (Arrays.compareUnsigned(
|
|
||||||
maxPackedValue,
|
|
||||||
offset,
|
|
||||||
offset + bytesPerDim,
|
|
||||||
rangeClause.upperValue,
|
|
||||||
offset,
|
|
||||||
offset + bytesPerDim)
|
|
||||||
<= 0)) {
|
|
||||||
return PointValues.Relation.CELL_INSIDE_QUERY;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
crosses |=
|
rangeCrosses |=
|
||||||
Arrays.compareUnsigned(
|
comparator.compare(minPackedValue, offset, rangeClause.lowerValue, offset) < 0
|
||||||
minPackedValue,
|
|| comparator.compare(
|
||||||
offset,
|
maxPackedValue, offset, rangeClause.upperValue, offset)
|
||||||
offset + bytesPerDim,
|
|
||||||
rangeClause.lowerValue,
|
|
||||||
offset,
|
|
||||||
offset + bytesPerDim)
|
|
||||||
< 0
|
|
||||||
|| Arrays.compareUnsigned(
|
|
||||||
maxPackedValue,
|
|
||||||
offset,
|
|
||||||
offset + bytesPerDim,
|
|
||||||
rangeClause.upperValue,
|
|
||||||
offset,
|
|
||||||
offset + bytesPerDim)
|
|
||||||
> 0;
|
> 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rangeCrosses == false) {
|
||||||
|
// At this point we know that the cell is fully inside the range clause, so we
|
||||||
|
// return early:
|
||||||
|
return PointValues.Relation.CELL_INSIDE_QUERY;
|
||||||
|
} else {
|
||||||
|
// This range clause crosses the cell, but we'll keep checking more ranges to see if
|
||||||
|
// one fully contains the cell:
|
||||||
|
crosses = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (crosses) {
|
if (crosses) {
|
||||||
|
@ -300,21 +280,8 @@ public abstract class MultiRangeQuery extends Query {
|
||||||
for (RangeClause rangeClause : rangeClauses) {
|
for (RangeClause rangeClause : rangeClauses) {
|
||||||
for (int i = 0; i < numDims; ++i) {
|
for (int i = 0; i < numDims; ++i) {
|
||||||
int offset = i * bytesPerDim;
|
int offset = i * bytesPerDim;
|
||||||
if (Arrays.compareUnsigned(
|
if (comparator.compare(rangeClause.lowerValue, offset, fieldPackedLower, offset) > 0
|
||||||
rangeClause.lowerValue,
|
|| comparator.compare(rangeClause.upperValue, offset, fieldPackedUpper, offset)
|
||||||
offset,
|
|
||||||
offset + bytesPerDim,
|
|
||||||
fieldPackedLower,
|
|
||||||
offset,
|
|
||||||
offset + bytesPerDim)
|
|
||||||
> 0
|
|
||||||
|| Arrays.compareUnsigned(
|
|
||||||
rangeClause.upperValue,
|
|
||||||
offset,
|
|
||||||
offset + bytesPerDim,
|
|
||||||
fieldPackedUpper,
|
|
||||||
offset,
|
|
||||||
offset + bytesPerDim)
|
|
||||||
< 0) {
|
< 0) {
|
||||||
allDocsMatch = false;
|
allDocsMatch = false;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
|
|
||||||
package org.apache.lucene.sandbox.search;
|
package org.apache.lucene.sandbox.search;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.DoublePoint;
|
import org.apache.lucene.document.DoublePoint;
|
||||||
|
@ -29,14 +30,71 @@ import org.apache.lucene.sandbox.document.DoublePointMultiRangeBuilder;
|
||||||
import org.apache.lucene.sandbox.document.FloatPointMultiRangeBuilder;
|
import org.apache.lucene.sandbox.document.FloatPointMultiRangeBuilder;
|
||||||
import org.apache.lucene.sandbox.document.IntPointMultiRangeBuilder;
|
import org.apache.lucene.sandbox.document.IntPointMultiRangeBuilder;
|
||||||
import org.apache.lucene.sandbox.document.LongPointMultiRangeBuilder;
|
import org.apache.lucene.sandbox.document.LongPointMultiRangeBuilder;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.Sort;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
public class TestMultiRangeQueries extends LuceneTestCase {
|
public class TestMultiRangeQueries extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void testDuelWithStandardDisjunction() throws IOException {
|
||||||
|
int iterations = LuceneTestCase.TEST_NIGHTLY ? atLeast(100) : 10;
|
||||||
|
for (int iter = 0; iter < iterations; iter++) {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||||
|
|
||||||
|
int dims = RandomNumbers.randomIntBetween(random(), 1, 8);
|
||||||
|
|
||||||
|
long[] scratch = new long[dims];
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
int numPoints = RandomNumbers.randomIntBetween(random(), 1, 10);
|
||||||
|
Document doc = new Document();
|
||||||
|
for (int j = 0; j < numPoints; j++) {
|
||||||
|
for (int v = 0; v < dims; v++) {
|
||||||
|
scratch[v] = RandomNumbers.randomLongBetween(random(), 0, 100);
|
||||||
|
}
|
||||||
|
doc.add(new LongPoint("point", scratch));
|
||||||
|
}
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexReader reader = w.getReader();
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
|
||||||
|
int numRanges = RandomNumbers.randomIntBetween(random(), 1, 20);
|
||||||
|
LongPointMultiRangeBuilder builder1 = new LongPointMultiRangeBuilder("point", dims);
|
||||||
|
BooleanQuery.Builder builder2 = new BooleanQuery.Builder();
|
||||||
|
for (int i = 0; i < numRanges; i++) {
|
||||||
|
long[] lower = new long[dims];
|
||||||
|
long[] upper = new long[dims];
|
||||||
|
for (int j = 0; j < dims; j++) {
|
||||||
|
lower[j] = RandomNumbers.randomLongBetween(random(), -100, 200);
|
||||||
|
upper[j] = lower[j] + RandomNumbers.randomLongBetween(random(), 0, 100);
|
||||||
|
}
|
||||||
|
builder1.add(lower, upper);
|
||||||
|
builder2.add(LongPoint.newRangeQuery("point", lower, upper), BooleanClause.Occur.SHOULD);
|
||||||
|
}
|
||||||
|
|
||||||
|
Query query1 = builder1.build();
|
||||||
|
Query query2 = builder2.build();
|
||||||
|
TopDocs result1 = searcher.search(query1, 10, Sort.INDEXORDER);
|
||||||
|
TopDocs result2 = searcher.search(query2, 10, Sort.INDEXORDER);
|
||||||
|
assertEquals(result2.totalHits, result1.totalHits);
|
||||||
|
assertEquals(result2.scoreDocs.length, result1.scoreDocs.length);
|
||||||
|
for (int i = 0; i < result2.scoreDocs.length; i++) {
|
||||||
|
assertEquals(result2.scoreDocs[i].doc, result1.scoreDocs[i].doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
IOUtils.close(reader, w, dir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testDoubleRandomMultiRangeQuery() throws IOException {
|
public void testDoubleRandomMultiRangeQuery() throws IOException {
|
||||||
final int numDims = TestUtil.nextInt(random(), 1, 3);
|
final int numDims = TestUtil.nextInt(random(), 1, 3);
|
||||||
final int numVals = TestUtil.nextInt(random(), 3, 8);
|
final int numVals = TestUtil.nextInt(random(), 3, 8);
|
||||||
|
@ -123,9 +181,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
|
||||||
|
|
||||||
assertEquals(searcher.count(query), 2);
|
assertEquals(searcher.count(query), 2);
|
||||||
|
|
||||||
// None match
|
// None match (one dimension is in range but the rest aren't)
|
||||||
double[] nonMatchingFirstRangeLower = {1.3, 3.5, 2.7};
|
double[] nonMatchingFirstRangeLower = {111.3, 3.5, 2.7};
|
||||||
double[] nonMatchingFirstRangeUpper = {5.2, 8.3, 7.8};
|
double[] nonMatchingFirstRangeUpper = {117.3, 8.3, 7.8};
|
||||||
|
|
||||||
double[] nonMatchingSecondRangeLower = {11246.3, 19388.7, 21248.4};
|
double[] nonMatchingSecondRangeLower = {11246.3, 19388.7, 21248.4};
|
||||||
double[] nonMatchingSecondRangeUpper = {13242.9, 20214.2, 23236.5};
|
double[] nonMatchingSecondRangeUpper = {13242.9, 20214.2, 23236.5};
|
||||||
|
@ -244,9 +302,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
|
||||||
|
|
||||||
assertEquals(searcher.count(query), 2);
|
assertEquals(searcher.count(query), 2);
|
||||||
|
|
||||||
// None match
|
// None match (one dimension is in range but the rest aren't)
|
||||||
long[] nonMatchingFirstRangeLower = {1, 3, 2};
|
long[] nonMatchingFirstRangeLower = {111, 3, 2};
|
||||||
long[] nonMatchingFirstRangeUpper = {5, 8, 7};
|
long[] nonMatchingFirstRangeUpper = {117, 8, 7};
|
||||||
|
|
||||||
long[] nonMatchingSecondRangeLower = {11246, 19388, 21248};
|
long[] nonMatchingSecondRangeLower = {11246, 19388, 21248};
|
||||||
long[] nonMatchingSecondRangeUpper = {13242, 20214, 23236};
|
long[] nonMatchingSecondRangeUpper = {13242, 20214, 23236};
|
||||||
|
@ -365,9 +423,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
|
||||||
|
|
||||||
assertEquals(searcher.count(query), 2);
|
assertEquals(searcher.count(query), 2);
|
||||||
|
|
||||||
// None Match
|
// None match (one dimension is in range but the rest aren't)
|
||||||
float[] nonMatchingFirstRangeLower = {1.4f, 3.3f, 2.7f};
|
float[] nonMatchingFirstRangeLower = {111.3f, 3.3f, 2.7f};
|
||||||
float[] nonMatchingFirstRangeUpper = {5.4f, 8.2f, 7.3f};
|
float[] nonMatchingFirstRangeUpper = {117.3f, 8.2f, 7.3f};
|
||||||
|
|
||||||
float[] nonMatchingSecondRangeLower = {11246.2f, 19388.6f, 21248.3f};
|
float[] nonMatchingSecondRangeLower = {11246.2f, 19388.6f, 21248.3f};
|
||||||
float[] nonMatchingSecondRangeUpper = {13242.4f, 20214.7f, 23236.3f};
|
float[] nonMatchingSecondRangeUpper = {13242.4f, 20214.7f, 23236.3f};
|
||||||
|
@ -486,9 +544,9 @@ public class TestMultiRangeQueries extends LuceneTestCase {
|
||||||
|
|
||||||
assertEquals(searcher.count(query), 2);
|
assertEquals(searcher.count(query), 2);
|
||||||
|
|
||||||
// None match
|
// None match (one dimension is in range but the rest aren't)
|
||||||
int[] nonMatchingFirstRangeLower = {1, 3, 2};
|
int[] nonMatchingFirstRangeLower = {111, 3, 2};
|
||||||
int[] nonMatchingFirstRangeUpper = {5, 8, 7};
|
int[] nonMatchingFirstRangeUpper = {117, 8, 7};
|
||||||
|
|
||||||
int[] nonMatchingSecondRangeLower = {11246, 19388, 21248};
|
int[] nonMatchingSecondRangeLower = {11246, 19388, 21248};
|
||||||
int[] nonMatchingSecondRangeUpper = {13242, 20214, 23236};
|
int[] nonMatchingSecondRangeUpper = {13242, 20214, 23236};
|
||||||
|
|
Loading…
Reference in New Issue