mirror of https://github.com/apache/lucene.git
Leverage doc value skip lists in DocValuesRewriteMethod if indexed (#13672)
This commit is contained in:
parent
4e3945ed54
commit
68882c8b89
|
@ -376,6 +376,8 @@ Optimizations
|
|||
|
||||
* GITHUB#13587: Use Max WAND optimizations with ToParentBlockJoinQuery when using ScoreMode.Max (Mike Pellegrini)
|
||||
|
||||
* GITHUB#13672: Leverage doc value skip lists in DocValuesRewriteMethod if indexed. (Greg Miller)
|
||||
|
||||
Changes in runtime behavior
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.index.SortedNumericDocValues;
|
|||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.DocValuesRangeIterator;
|
||||
import org.apache.lucene.search.FieldExistsQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
|
@ -179,7 +180,7 @@ final class SortedNumericDocValuesRangeQuery extends Query {
|
|||
};
|
||||
}
|
||||
if (skipper != null) {
|
||||
iterator = new DocValuesRangeIterator(iterator, skipper, lowerValue, upperValue);
|
||||
iterator = new DocValuesRangeIterator(iterator, skipper, lowerValue, upperValue, false);
|
||||
}
|
||||
final var scorer = new ConstantScoreScorer(score(), scoreMode, iterator);
|
||||
return new DefaultScorerSupplier(scorer);
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.index.SortedSetDocValues;
|
|||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.DocValuesRangeIterator;
|
||||
import org.apache.lucene.search.FieldExistsQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
@ -217,7 +218,7 @@ final class SortedSetDocValuesRangeQuery extends Query {
|
|||
};
|
||||
}
|
||||
if (skipper != null) {
|
||||
iterator = new DocValuesRangeIterator(iterator, skipper, minOrd, maxOrd);
|
||||
iterator = new DocValuesRangeIterator(iterator, skipper, minOrd, maxOrd, false);
|
||||
}
|
||||
return new ConstantScoreScorer(score(), scoreMode, iterator);
|
||||
}
|
||||
|
|
|
@ -14,18 +14,18 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.document;
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.DocValuesSkipper;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
|
||||
/**
|
||||
* Wrapper around a {@link TwoPhaseIterator} for a doc-values range query that speeds things up by
|
||||
* taking advantage of a {@link DocValuesSkipper}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
final class DocValuesRangeIterator extends TwoPhaseIterator {
|
||||
public final class DocValuesRangeIterator extends TwoPhaseIterator {
|
||||
|
||||
enum Match {
|
||||
/** None of the documents in the range match */
|
||||
|
@ -41,19 +41,29 @@ final class DocValuesRangeIterator extends TwoPhaseIterator {
|
|||
private final Approximation approximation;
|
||||
private final TwoPhaseIterator innerTwoPhase;
|
||||
|
||||
DocValuesRangeIterator(
|
||||
TwoPhaseIterator twoPhase, DocValuesSkipper skipper, long lowerValue, long upperValue) {
|
||||
super(new Approximation(twoPhase.approximation(), skipper, lowerValue, upperValue));
|
||||
public DocValuesRangeIterator(
|
||||
TwoPhaseIterator twoPhase,
|
||||
DocValuesSkipper skipper,
|
||||
long lowerValue,
|
||||
long upperValue,
|
||||
boolean queryRangeHasGaps) {
|
||||
super(
|
||||
queryRangeHasGaps
|
||||
? new RangeWithGapsApproximation(
|
||||
twoPhase.approximation(), skipper, lowerValue, upperValue)
|
||||
: new RangeNoGapsApproximation(
|
||||
twoPhase.approximation(), skipper, lowerValue, upperValue));
|
||||
this.approximation = (Approximation) approximation();
|
||||
this.innerTwoPhase = twoPhase;
|
||||
}
|
||||
|
||||
static class Approximation extends DocIdSetIterator {
|
||||
abstract static class Approximation extends DocIdSetIterator {
|
||||
|
||||
private final DocIdSetIterator innerApproximation;
|
||||
private final DocValuesSkipper skipper;
|
||||
private final long lowerValue;
|
||||
private final long upperValue;
|
||||
|
||||
protected final DocValuesSkipper skipper;
|
||||
protected final long lowerValue;
|
||||
protected final long upperValue;
|
||||
|
||||
private int doc = -1;
|
||||
|
||||
|
@ -137,7 +147,21 @@ final class DocValuesRangeIterator extends TwoPhaseIterator {
|
|||
return innerApproximation.cost();
|
||||
}
|
||||
|
||||
private Match match(int level) {
|
||||
protected abstract Match match(int level);
|
||||
}
|
||||
|
||||
private static final class RangeNoGapsApproximation extends Approximation {
|
||||
|
||||
RangeNoGapsApproximation(
|
||||
DocIdSetIterator innerApproximation,
|
||||
DocValuesSkipper skipper,
|
||||
long lowerValue,
|
||||
long upperValue) {
|
||||
super(innerApproximation, skipper, lowerValue, upperValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Match match(int level) {
|
||||
long minValue = skipper.minValue(level);
|
||||
long maxValue = skipper.maxValue(level);
|
||||
if (minValue > upperValue || maxValue < lowerValue) {
|
||||
|
@ -154,6 +178,28 @@ final class DocValuesRangeIterator extends TwoPhaseIterator {
|
|||
}
|
||||
}
|
||||
|
||||
private static final class RangeWithGapsApproximation extends Approximation {
|
||||
|
||||
RangeWithGapsApproximation(
|
||||
DocIdSetIterator innerApproximation,
|
||||
DocValuesSkipper skipper,
|
||||
long lowerValue,
|
||||
long upperValue) {
|
||||
super(innerApproximation, skipper, lowerValue, upperValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Match match(int level) {
|
||||
long minValue = skipper.minValue(level);
|
||||
long maxValue = skipper.maxValue(level);
|
||||
if (minValue > upperValue || maxValue < lowerValue) {
|
||||
return Match.NO;
|
||||
} else {
|
||||
return Match.MAYBE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean matches() throws IOException {
|
||||
return switch (approximation.match) {
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesSkipper;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
|
@ -166,27 +167,29 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
|
|||
return new ConstantScoreScorer(score(), scoreMode, DocIdSetIterator.empty());
|
||||
}
|
||||
|
||||
// Leverage a DV skipper if one was indexed for the field:
|
||||
DocValuesSkipper skipper = context.reader().getDocValuesSkipper(query.field);
|
||||
|
||||
// Create a bit set for the "term set" ordinals (these are the terms provided by the
|
||||
// query that are actually present in the doc values field). Cannot use FixedBitSet
|
||||
// because we require long index (ord):
|
||||
final LongBitSet termSet = new LongBitSet(values.getValueCount());
|
||||
long minOrd = termsEnum.ord();
|
||||
assert minOrd >= 0;
|
||||
long maxOrd = -1;
|
||||
do {
|
||||
long ord = termsEnum.ord();
|
||||
if (ord >= 0) {
|
||||
assert ord > maxOrd;
|
||||
maxOrd = ord;
|
||||
termSet.set(ord);
|
||||
}
|
||||
assert ord >= 0 && ord > maxOrd;
|
||||
maxOrd = ord;
|
||||
termSet.set(ord);
|
||||
} while (termsEnum.next() != null);
|
||||
|
||||
// no terms matched in this segment
|
||||
if (maxOrd < 0) {
|
||||
if (skipper != null && (minOrd > skipper.maxValue() || maxOrd < skipper.minValue())) {
|
||||
return new ConstantScoreScorer(score(), scoreMode, DocIdSetIterator.empty());
|
||||
}
|
||||
|
||||
final SortedDocValues singleton = DocValues.unwrapSingleton(values);
|
||||
final TwoPhaseIterator iterator;
|
||||
TwoPhaseIterator iterator;
|
||||
final long max = maxOrd;
|
||||
if (singleton != null) {
|
||||
iterator =
|
||||
|
@ -224,6 +227,9 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
|
|||
};
|
||||
}
|
||||
|
||||
if (skipper != null) {
|
||||
iterator = new DocValuesRangeIterator(iterator, skipper, minOrd, maxOrd, true);
|
||||
}
|
||||
return new ConstantScoreScorer(score(), scoreMode, iterator);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,273 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.document;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import org.apache.lucene.index.DocValuesSkipper;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
|
||||
public class TestDocValuesRangeIterator extends LuceneTestCase {
|
||||
|
||||
public void testSingleLevel() throws IOException {
|
||||
doTestBasics(false);
|
||||
}
|
||||
|
||||
public void testMultipleLevels() throws IOException {
|
||||
doTestBasics(true);
|
||||
}
|
||||
|
||||
private void doTestBasics(boolean doLevels) throws IOException {
|
||||
long queryMin = 10;
|
||||
long queryMax = 20;
|
||||
|
||||
// Fake numeric doc values so that:
|
||||
// docs 0-256 all match
|
||||
// docs in 256-512 are all greater than queryMax
|
||||
// docs in 512-768 are all less than queryMin
|
||||
// docs in 768-1024 have some docs that match the range, others not
|
||||
// docs in 1024-2048 follow a similar pattern as docs in 0-1024 except that not all docs have a
|
||||
// value
|
||||
NumericDocValues values =
|
||||
new NumericDocValues() {
|
||||
|
||||
int doc = -1;
|
||||
|
||||
@Override
|
||||
public boolean advanceExact(int target) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(doc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target < 1024) {
|
||||
// dense up to 1024
|
||||
return doc = target;
|
||||
} else if (doc < 2047) {
|
||||
// 50% docs have a value up to 2048
|
||||
return doc = target + (target & 1);
|
||||
} else {
|
||||
return doc = DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long longValue() throws IOException {
|
||||
int d = doc % 1024;
|
||||
if (d < 128) {
|
||||
return (queryMin + queryMax) >> 1;
|
||||
} else if (d < 256) {
|
||||
return queryMax + 1;
|
||||
} else if (d < 512) {
|
||||
return queryMin - 1;
|
||||
} else {
|
||||
return switch ((d / 2) % 3) {
|
||||
case 0 -> queryMin - 1;
|
||||
case 1 -> queryMax + 1;
|
||||
case 2 -> (queryMin + queryMax) >> 1;
|
||||
default -> throw new AssertionError();
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return 42;
|
||||
}
|
||||
};
|
||||
|
||||
AtomicBoolean twoPhaseCalled = new AtomicBoolean();
|
||||
TwoPhaseIterator twoPhase =
|
||||
new TwoPhaseIterator(values) {
|
||||
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
twoPhaseCalled.set(true);
|
||||
long v = values.longValue();
|
||||
return v >= queryMin && v <= queryMax;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return 2f; // 2 comparisons
|
||||
}
|
||||
};
|
||||
|
||||
DocValuesSkipper skipper =
|
||||
new DocValuesSkipper() {
|
||||
|
||||
int doc = -1;
|
||||
|
||||
@Override
|
||||
public void advance(int target) throws IOException {
|
||||
doc = target;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numLevels() {
|
||||
return doLevels ? 3 : 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int minDocID(int level) {
|
||||
int rangeLog = 9 - numLevels() + level;
|
||||
|
||||
// the level is the log2 of the interval
|
||||
if (doc < 0) {
|
||||
return -1;
|
||||
} else if (doc >= 2048) {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
} else {
|
||||
int mask = (1 << rangeLog) - 1;
|
||||
// prior multiple of 2^level
|
||||
return doc & ~mask;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maxDocID(int level) {
|
||||
int rangeLog = 9 - numLevels() + level;
|
||||
|
||||
int minDocID = minDocID(level);
|
||||
return switch (minDocID) {
|
||||
case -1 -> -1;
|
||||
case DocIdSetIterator.NO_MORE_DOCS -> DocIdSetIterator.NO_MORE_DOCS;
|
||||
default -> minDocID + (1 << rangeLog) - 1;
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public long minValue(int level) {
|
||||
int d = doc % 1024;
|
||||
if (d < 128) {
|
||||
return queryMin;
|
||||
} else if (d < 256) {
|
||||
return queryMax + 1;
|
||||
} else if (d < 768) {
|
||||
return queryMin - 1;
|
||||
} else {
|
||||
return queryMin - 1;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long maxValue(int level) {
|
||||
int d = doc % 1024;
|
||||
if (d < 128) {
|
||||
return queryMax;
|
||||
} else if (d < 256) {
|
||||
return queryMax + 1;
|
||||
} else if (d < 768) {
|
||||
return queryMin - 1;
|
||||
} else {
|
||||
return queryMax + 1;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docCount(int level) {
|
||||
int rangeLog = 9 - numLevels() + level;
|
||||
|
||||
if (doc < 1024) {
|
||||
return 1 << rangeLog;
|
||||
} else {
|
||||
// half docs have a value
|
||||
return 1 << rangeLog >> 1;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long minValue() {
|
||||
return Long.MIN_VALUE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long maxValue() {
|
||||
return Long.MAX_VALUE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docCount() {
|
||||
return 1024 + 1024 / 2;
|
||||
}
|
||||
};
|
||||
|
||||
DocValuesRangeIterator rangeIterator =
|
||||
new DocValuesRangeIterator(twoPhase, skipper, queryMin, queryMax);
|
||||
DocValuesRangeIterator.Approximation rangeApproximation =
|
||||
(DocValuesRangeIterator.Approximation) rangeIterator.approximation();
|
||||
|
||||
assertEquals(100, rangeApproximation.advance(100));
|
||||
assertEquals(DocValuesRangeIterator.Match.YES, rangeApproximation.match);
|
||||
assertEquals(255, rangeApproximation.upTo);
|
||||
assertTrue(rangeIterator.matches());
|
||||
assertTrue(values.docID() < rangeApproximation.docID()); // we did not advance doc values
|
||||
assertFalse(twoPhaseCalled.get());
|
||||
|
||||
assertEquals(768, rangeApproximation.advance(300));
|
||||
assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximation.match);
|
||||
if (doLevels) {
|
||||
assertEquals(831, rangeApproximation.upTo);
|
||||
} else {
|
||||
assertEquals(1023, rangeApproximation.upTo);
|
||||
}
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
assertEquals(values.docID(), rangeApproximation.docID());
|
||||
assertEquals(twoPhase.matches(), rangeIterator.matches());
|
||||
assertTrue(twoPhaseCalled.get());
|
||||
twoPhaseCalled.set(false);
|
||||
rangeApproximation.nextDoc();
|
||||
}
|
||||
|
||||
assertEquals(1100, rangeApproximation.advance(1099));
|
||||
assertEquals(DocValuesRangeIterator.Match.IF_DOC_HAS_VALUE, rangeApproximation.match);
|
||||
assertEquals(1024 + 256 - 1, rangeApproximation.upTo);
|
||||
assertEquals(values.docID(), rangeApproximation.docID());
|
||||
assertTrue(rangeIterator.matches());
|
||||
assertFalse(twoPhaseCalled.get());
|
||||
|
||||
assertEquals(1024 + 768, rangeApproximation.advance(1024 + 300));
|
||||
assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximation.match);
|
||||
if (doLevels) {
|
||||
assertEquals(1024 + 831, rangeApproximation.upTo);
|
||||
} else {
|
||||
assertEquals(2047, rangeApproximation.upTo);
|
||||
}
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
assertEquals(values.docID(), rangeApproximation.docID());
|
||||
assertEquals(twoPhase.matches(), rangeIterator.matches());
|
||||
assertTrue(twoPhaseCalled.get());
|
||||
twoPhaseCalled.set(false);
|
||||
rangeApproximation.nextDoc();
|
||||
}
|
||||
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, rangeApproximation.advance(2048));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,332 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import org.apache.lucene.index.DocValuesSkipper;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
|
||||
public class TestDocValuesRangeIterator extends LuceneTestCase {
|
||||
|
||||
public void testSingleLevel() throws IOException {
|
||||
doTestBasics(false);
|
||||
}
|
||||
|
||||
public void testMultipleLevels() throws IOException {
|
||||
doTestBasics(true);
|
||||
}
|
||||
|
||||
private void doTestBasics(boolean doLevels) throws IOException {
|
||||
long queryMin = 10;
|
||||
long queryMax = 20;
|
||||
|
||||
// Test with both gaps and no-gaps in the ranges:
|
||||
NumericDocValues values = docValues(queryMin, queryMax);
|
||||
NumericDocValues values2 = docValues(queryMin, queryMax);
|
||||
|
||||
AtomicBoolean twoPhaseCalled = new AtomicBoolean();
|
||||
TwoPhaseIterator twoPhase = twoPhaseIterator(values, queryMin, queryMax, twoPhaseCalled);
|
||||
AtomicBoolean twoPhaseCalled2 = new AtomicBoolean();
|
||||
TwoPhaseIterator twoPhase2 = twoPhaseIterator(values2, queryMin, queryMax, twoPhaseCalled2);
|
||||
|
||||
DocValuesSkipper skipper = docValuesSkipper(queryMin, queryMax, doLevels);
|
||||
DocValuesSkipper skipper2 = docValuesSkipper(queryMin, queryMax, doLevels);
|
||||
|
||||
DocValuesRangeIterator rangeIterator =
|
||||
new DocValuesRangeIterator(twoPhase, skipper, queryMin, queryMax, false);
|
||||
DocValuesRangeIterator rangeIteratorWithGaps =
|
||||
new DocValuesRangeIterator(twoPhase2, skipper2, queryMin, queryMax, true);
|
||||
DocValuesRangeIterator.Approximation rangeApproximation =
|
||||
(DocValuesRangeIterator.Approximation) rangeIterator.approximation();
|
||||
DocValuesRangeIterator.Approximation rangeApproximationWithGaps =
|
||||
(DocValuesRangeIterator.Approximation) rangeIteratorWithGaps.approximation();
|
||||
|
||||
assertEquals(100, rangeApproximation.advance(100));
|
||||
assertEquals(100, rangeApproximationWithGaps.advance(100));
|
||||
assertEquals(DocValuesRangeIterator.Match.YES, rangeApproximation.match);
|
||||
assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximationWithGaps.match);
|
||||
assertEquals(255, rangeApproximation.upTo);
|
||||
if (doLevels) {
|
||||
assertEquals(127, rangeApproximationWithGaps.upTo);
|
||||
} else {
|
||||
assertEquals(255, rangeApproximationWithGaps.upTo);
|
||||
}
|
||||
assertTrue(rangeIterator.matches());
|
||||
assertTrue(rangeIteratorWithGaps.matches());
|
||||
assertTrue(values.docID() < rangeApproximation.docID()); // we did not advance doc values
|
||||
assertEquals(
|
||||
values2.docID(), rangeApproximationWithGaps.docID()); // we _did_ advance doc values
|
||||
assertFalse(twoPhaseCalled.get());
|
||||
assertTrue(twoPhaseCalled2.get());
|
||||
twoPhaseCalled2.set(false);
|
||||
|
||||
assertEquals(768, rangeApproximation.advance(300));
|
||||
assertEquals(768, rangeApproximationWithGaps.advance(300));
|
||||
assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximation.match);
|
||||
assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximationWithGaps.match);
|
||||
if (doLevels) {
|
||||
assertEquals(831, rangeApproximation.upTo);
|
||||
assertEquals(831, rangeApproximationWithGaps.upTo);
|
||||
} else {
|
||||
assertEquals(1023, rangeApproximation.upTo);
|
||||
assertEquals(1023, rangeApproximationWithGaps.upTo);
|
||||
}
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
assertEquals(values.docID(), rangeApproximation.docID());
|
||||
assertEquals(values2.docID(), rangeApproximationWithGaps.docID());
|
||||
assertEquals(twoPhase.matches(), rangeIterator.matches());
|
||||
assertEquals(twoPhase2.matches(), rangeIteratorWithGaps.matches());
|
||||
assertTrue(twoPhaseCalled.get());
|
||||
assertTrue(twoPhaseCalled2.get());
|
||||
twoPhaseCalled.set(false);
|
||||
twoPhaseCalled2.set(false);
|
||||
rangeApproximation.nextDoc();
|
||||
rangeApproximationWithGaps.nextDoc();
|
||||
}
|
||||
|
||||
assertEquals(1100, rangeApproximation.advance(1099));
|
||||
assertEquals(1100, rangeApproximationWithGaps.advance(1099));
|
||||
assertEquals(DocValuesRangeIterator.Match.IF_DOC_HAS_VALUE, rangeApproximation.match);
|
||||
assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximationWithGaps.match);
|
||||
assertEquals(1024 + 256 - 1, rangeApproximation.upTo);
|
||||
if (doLevels) {
|
||||
assertEquals(1024 + 128 - 1, rangeApproximationWithGaps.upTo);
|
||||
} else {
|
||||
assertEquals(1024 + 256 - 1, rangeApproximationWithGaps.upTo);
|
||||
}
|
||||
assertEquals(values.docID(), rangeApproximation.docID());
|
||||
assertEquals(values2.docID(), rangeApproximationWithGaps.docID());
|
||||
assertTrue(rangeIterator.matches());
|
||||
assertTrue(rangeIteratorWithGaps.matches());
|
||||
assertFalse(twoPhaseCalled.get());
|
||||
assertTrue(twoPhaseCalled2.get());
|
||||
twoPhaseCalled2.set(false);
|
||||
|
||||
assertEquals(1024 + 768, rangeApproximation.advance(1024 + 300));
|
||||
assertEquals(1024 + 768, rangeApproximationWithGaps.advance(1024 + 300));
|
||||
assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximation.match);
|
||||
assertEquals(DocValuesRangeIterator.Match.MAYBE, rangeApproximationWithGaps.match);
|
||||
if (doLevels) {
|
||||
assertEquals(1024 + 831, rangeApproximation.upTo);
|
||||
assertEquals(1024 + 831, rangeApproximationWithGaps.upTo);
|
||||
} else {
|
||||
assertEquals(2047, rangeApproximation.upTo);
|
||||
assertEquals(2047, rangeApproximationWithGaps.upTo);
|
||||
}
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
assertEquals(values.docID(), rangeApproximation.docID());
|
||||
assertEquals(values2.docID(), rangeApproximationWithGaps.docID());
|
||||
assertEquals(twoPhase.matches(), rangeIterator.matches());
|
||||
assertEquals(twoPhase2.matches(), rangeIteratorWithGaps.matches());
|
||||
assertTrue(twoPhaseCalled.get());
|
||||
assertTrue(twoPhaseCalled2.get());
|
||||
twoPhaseCalled.set(false);
|
||||
twoPhaseCalled2.set(false);
|
||||
rangeApproximation.nextDoc();
|
||||
rangeApproximationWithGaps.nextDoc();
|
||||
}
|
||||
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, rangeApproximation.advance(2048));
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, rangeApproximationWithGaps.advance(2048));
|
||||
}
|
||||
|
||||
// Fake numeric doc values so that:
|
||||
// docs 0-256 all match
|
||||
// docs in 256-512 are all greater than queryMax
|
||||
// docs in 512-768 are all less than queryMin
|
||||
// docs in 768-1024 have some docs that match the range, others not
|
||||
// docs in 1024-2048 follow a similar pattern as docs in 0-1024 except that not all docs have a
|
||||
// value
|
||||
private static NumericDocValues docValues(long queryMin, long queryMax) {
|
||||
return new NumericDocValues() {
|
||||
|
||||
int doc = -1;
|
||||
|
||||
@Override
|
||||
public boolean advanceExact(int target) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(doc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target < 1024) {
|
||||
// dense up to 1024
|
||||
return doc = target;
|
||||
} else if (doc < 2047) {
|
||||
// 50% docs have a value up to 2048
|
||||
return doc = target + (target & 1);
|
||||
} else {
|
||||
return doc = DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long longValue() throws IOException {
|
||||
int d = doc % 1024;
|
||||
if (d < 128) {
|
||||
return (queryMin + queryMax) >> 1;
|
||||
} else if (d < 256) {
|
||||
return queryMax + 1;
|
||||
} else if (d < 512) {
|
||||
return queryMin - 1;
|
||||
} else {
|
||||
return switch ((d / 2) % 3) {
|
||||
case 0 -> queryMin - 1;
|
||||
case 1 -> queryMax + 1;
|
||||
case 2 -> (queryMin + queryMax) >> 1;
|
||||
default -> throw new AssertionError();
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return 42;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static TwoPhaseIterator twoPhaseIterator(
|
||||
NumericDocValues values, long queryMin, long queryMax, AtomicBoolean twoPhaseCalled) {
|
||||
return new TwoPhaseIterator(values) {
|
||||
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
twoPhaseCalled.set(true);
|
||||
long v = values.longValue();
|
||||
return v >= queryMin && v <= queryMax;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return 2f; // 2 comparisons
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static DocValuesSkipper docValuesSkipper(long queryMin, long queryMax, boolean doLevels) {
|
||||
return new DocValuesSkipper() {
|
||||
|
||||
int doc = -1;
|
||||
|
||||
@Override
|
||||
public void advance(int target) throws IOException {
|
||||
doc = target;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numLevels() {
|
||||
return doLevels ? 3 : 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int minDocID(int level) {
|
||||
int rangeLog = 9 - numLevels() + level;
|
||||
|
||||
// the level is the log2 of the interval
|
||||
if (doc < 0) {
|
||||
return -1;
|
||||
} else if (doc >= 2048) {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
} else {
|
||||
int mask = (1 << rangeLog) - 1;
|
||||
// prior multiple of 2^level
|
||||
return doc & ~mask;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maxDocID(int level) {
|
||||
int rangeLog = 9 - numLevels() + level;
|
||||
|
||||
int minDocID = minDocID(level);
|
||||
return switch (minDocID) {
|
||||
case -1 -> -1;
|
||||
case DocIdSetIterator.NO_MORE_DOCS -> DocIdSetIterator.NO_MORE_DOCS;
|
||||
default -> minDocID + (1 << rangeLog) - 1;
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public long minValue(int level) {
|
||||
int d = doc % 1024;
|
||||
if (d < 128) {
|
||||
return queryMin;
|
||||
} else if (d < 256) {
|
||||
return queryMax + 1;
|
||||
} else if (d < 768) {
|
||||
return queryMin - 1;
|
||||
} else {
|
||||
return queryMin - 1;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long maxValue(int level) {
|
||||
int d = doc % 1024;
|
||||
if (d < 128) {
|
||||
return queryMax;
|
||||
} else if (d < 256) {
|
||||
return queryMax + 1;
|
||||
} else if (d < 768) {
|
||||
return queryMin - 1;
|
||||
} else {
|
||||
return queryMax + 1;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docCount(int level) {
|
||||
int rangeLog = 9 - numLevels() + level;
|
||||
|
||||
if (doc < 1024) {
|
||||
return 1 << rangeLog;
|
||||
} else {
|
||||
// half docs have a value
|
||||
return 1 << rangeLog >> 1;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long minValue() {
|
||||
return Long.MIN_VALUE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long maxValue() {
|
||||
return Long.MAX_VALUE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docCount() {
|
||||
return 1024 + 1024 / 2;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -41,8 +41,7 @@ import org.apache.lucene.util.automaton.RegExp;
|
|||
|
||||
/** Tests the DocValuesRewriteMethod */
|
||||
public class TestDocValuesRewriteMethod extends LuceneTestCase {
|
||||
protected IndexSearcher searcher1;
|
||||
protected IndexSearcher searcher2;
|
||||
protected IndexSearcher searcher;
|
||||
private IndexReader reader;
|
||||
private Directory dir;
|
||||
protected String fieldName;
|
||||
|
@ -69,6 +68,7 @@ public class TestDocValuesRewriteMethod extends LuceneTestCase {
|
|||
String s = TestUtil.randomUnicodeString(random());
|
||||
doc.add(newStringField(fieldName, s, Field.Store.NO));
|
||||
doc.add(new SortedSetDocValuesField(fieldName, new BytesRef(s)));
|
||||
doc.add(SortedSetDocValuesField.indexedField(fieldName + "_with-skip", new BytesRef(s)));
|
||||
terms.add(s);
|
||||
}
|
||||
writer.addDocument(doc);
|
||||
|
@ -89,8 +89,7 @@ public class TestDocValuesRewriteMethod extends LuceneTestCase {
|
|||
}
|
||||
|
||||
reader = writer.getReader();
|
||||
searcher1 = newSearcher(reader);
|
||||
searcher2 = newSearcher(reader);
|
||||
searcher = newSearcher(reader);
|
||||
writer.close();
|
||||
}
|
||||
|
||||
|
@ -123,12 +122,22 @@ public class TestDocValuesRewriteMethod extends LuceneTestCase {
|
|||
name -> null,
|
||||
Operations.DEFAULT_DETERMINIZE_WORK_LIMIT,
|
||||
new DocValuesRewriteMethod());
|
||||
RegexpQuery docValuesWithSkip =
|
||||
new RegexpQuery(
|
||||
new Term(fieldName + "_with-skip", regexp),
|
||||
RegExp.NONE,
|
||||
0,
|
||||
name -> null,
|
||||
Operations.DEFAULT_DETERMINIZE_WORK_LIMIT,
|
||||
new DocValuesRewriteMethod());
|
||||
RegexpQuery inverted = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
|
||||
|
||||
TopDocs invertedDocs = searcher1.search(inverted, 25);
|
||||
TopDocs docValuesDocs = searcher2.search(docValues, 25);
|
||||
TopDocs invertedDocs = searcher.search(inverted, 25);
|
||||
TopDocs docValuesDocs = searcher.search(docValues, 25);
|
||||
TopDocs docValuesWithSkipDocs = searcher.search(docValuesWithSkip, 25);
|
||||
|
||||
CheckHits.checkEqual(inverted, invertedDocs.scoreDocs, docValuesDocs.scoreDocs);
|
||||
CheckHits.checkEqual(inverted, invertedDocs.scoreDocs, docValuesWithSkipDocs.scoreDocs);
|
||||
}
|
||||
|
||||
public void testEquals() throws Exception {
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.KeywordField;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.FilterDirectoryReader;
|
||||
|
@ -119,11 +120,14 @@ public class TestTermInSetQuery extends LuceneTestCase {
|
|||
}
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
final int numDocs = atLeast(100);
|
||||
final int numDocs = atLeast(10_000);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
Document doc = new Document();
|
||||
final BytesRef term = allTerms.get(random().nextInt(allTerms.size()));
|
||||
doc.add(new StringField(field, term, Store.NO));
|
||||
// Also include a doc values field with a skip-list so we can test doc-value rewrite as
|
||||
// well:
|
||||
doc.add(SortedSetDocValuesField.indexedField(field, term));
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
if (numTerms > 1 && random().nextBoolean()) {
|
||||
|
@ -154,7 +158,9 @@ public class TestTermInSetQuery extends LuceneTestCase {
|
|||
}
|
||||
final Query q1 = new ConstantScoreQuery(bq.build());
|
||||
final Query q2 = new TermInSetQuery(field, queryTerms);
|
||||
final Query q3 = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, field, queryTerms);
|
||||
assertSameMatches(searcher, new BoostQuery(q1, boost), new BoostQuery(q2, boost), true);
|
||||
assertSameMatches(searcher, new BoostQuery(q1, boost), new BoostQuery(q3, boost), false);
|
||||
}
|
||||
|
||||
reader.close();
|
||||
|
@ -225,6 +231,53 @@ public class TestTermInSetQuery extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Make sure the doc values skipper isn't making the incorrect assumption that the min/max terms
|
||||
* from a TermInSetQuery don't form a continuous range.
|
||||
*/
|
||||
public void testSkipperOptimizationGapAssumption() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
// Index the first 10,000 docs all with the term "b" to get some skip list blocks with the range
|
||||
// [b, b]:
|
||||
for (int i = 0; i < 10_000; i++) {
|
||||
Document doc = new Document();
|
||||
BytesRef term = new BytesRef("b");
|
||||
doc.add(new SortedSetDocValuesField("field", term));
|
||||
doc.add(SortedSetDocValuesField.indexedField("idx_field", term));
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
|
||||
// Index a couple more docs with terms "a" and "c":
|
||||
Document doc = new Document();
|
||||
BytesRef term = new BytesRef("a");
|
||||
doc.add(new SortedSetDocValuesField("field", term));
|
||||
doc.add(SortedSetDocValuesField.indexedField("idx_field", term));
|
||||
iw.addDocument(doc);
|
||||
doc = new Document();
|
||||
term = new BytesRef("c");
|
||||
doc.add(new SortedSetDocValuesField("field", term));
|
||||
doc.add(SortedSetDocValuesField.indexedField("idx_field", term));
|
||||
iw.addDocument(doc);
|
||||
|
||||
iw.commit();
|
||||
IndexReader reader = iw.getReader();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
iw.close();
|
||||
|
||||
// Our query is for (or "a" "c") which should use a skip-list optimization to exclude blocks of
|
||||
// documents that fall outside the range [a, c]. We want to test that they don't incorrectly do
|
||||
// the inverse and include all docs in a block that fall within [a, c] (which is why we have
|
||||
// blocks of only "b" docs up-front):
|
||||
List<BytesRef> queryTerms = List.of(new BytesRef("a"), new BytesRef("c"));
|
||||
Query q1 = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "field", queryTerms);
|
||||
Query q2 = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "idx_field", queryTerms);
|
||||
assertSameMatches(searcher, q1, q2, false);
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private void assertSameMatches(IndexSearcher searcher, Query q1, Query q2, boolean scores)
|
||||
throws IOException {
|
||||
final int maxDoc = searcher.getIndexReader().maxDoc();
|
||||
|
|
Loading…
Reference in New Issue