mirror of https://github.com/apache/lucene.git
LUCENE-7051: Remove the "estimate match count" optimization from point queries.
This commit is contained in:
parent
5f9db01833
commit
46d05afdae
|
@ -19,7 +19,6 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.document.BinaryPoint;
|
||||
import org.apache.lucene.document.DoublePoint;
|
||||
|
@ -27,7 +26,6 @@ import org.apache.lucene.document.FloatPoint;
|
|||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
||||
|
@ -39,8 +37,6 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
/**
|
||||
|
@ -138,18 +134,16 @@ public abstract class PointInSetQuery extends Query {
|
|||
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
||||
|
||||
int[] hitCount = new int[1];
|
||||
|
||||
if (numDims == 1) {
|
||||
|
||||
// We optimize this common case, effectively doing a merge sort of the indexed values vs the queried set:
|
||||
values.intersect(field, new MergePointVisitor(sortedPackedPoints, hitCount, result));
|
||||
values.intersect(field, new MergePointVisitor(sortedPackedPoints, result));
|
||||
|
||||
} else {
|
||||
// NOTE: this is naive implementation, where for each point we re-walk the KD tree to intersect. We could instead do a similar
|
||||
// optimization as the 1D case, but I think it'd mean building a query-time KD tree so we could efficiently intersect against the
|
||||
// index, which is probably tricky!
|
||||
SinglePointVisitor visitor = new SinglePointVisitor(hitCount, result);
|
||||
SinglePointVisitor visitor = new SinglePointVisitor(result);
|
||||
TermIterator iterator = sortedPackedPoints.iterator();
|
||||
for (BytesRef point = iterator.next(); point != null; point = iterator.next()) {
|
||||
visitor.setPoint(point);
|
||||
|
@ -157,8 +151,7 @@ public abstract class PointInSetQuery extends Query {
|
|||
}
|
||||
}
|
||||
|
||||
// NOTE: hitCount[0] will be over-estimate in multi-valued case
|
||||
return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
|
||||
return new ConstantScoreScorer(this, score(), result.build().iterator());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -168,15 +161,13 @@ public abstract class PointInSetQuery extends Query {
|
|||
private class MergePointVisitor implements IntersectVisitor {
|
||||
|
||||
private final DocIdSetBuilder result;
|
||||
private final int[] hitCount;
|
||||
private TermIterator iterator;
|
||||
private BytesRef nextQueryPoint;
|
||||
private final byte[] lastMaxPackedValue;
|
||||
private final BytesRef scratch = new BytesRef();
|
||||
private final PrefixCodedTerms sortedPackedPoints;
|
||||
|
||||
public MergePointVisitor(PrefixCodedTerms sortedPackedPoints, int[] hitCount, DocIdSetBuilder result) throws IOException {
|
||||
this.hitCount = hitCount;
|
||||
public MergePointVisitor(PrefixCodedTerms sortedPackedPoints, DocIdSetBuilder result) throws IOException {
|
||||
this.result = result;
|
||||
this.sortedPackedPoints = sortedPackedPoints;
|
||||
lastMaxPackedValue = new byte[bytesPerDim];
|
||||
|
@ -196,7 +187,6 @@ public abstract class PointInSetQuery extends Query {
|
|||
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
hitCount[0]++;
|
||||
result.add(docID);
|
||||
}
|
||||
|
||||
|
@ -207,7 +197,6 @@ public abstract class PointInSetQuery extends Query {
|
|||
int cmp = nextQueryPoint.compareTo(scratch);
|
||||
if (cmp == 0) {
|
||||
// Query point equals index point, so collect and return
|
||||
hitCount[0]++;
|
||||
result.add(docID);
|
||||
break;
|
||||
} else if (cmp < 0) {
|
||||
|
@ -264,11 +253,9 @@ public abstract class PointInSetQuery extends Query {
|
|||
private class SinglePointVisitor implements IntersectVisitor {
|
||||
|
||||
private final DocIdSetBuilder result;
|
||||
private final int[] hitCount;
|
||||
private final byte[] pointBytes;
|
||||
|
||||
public SinglePointVisitor(int[] hitCount, DocIdSetBuilder result) {
|
||||
this.hitCount = hitCount;
|
||||
public SinglePointVisitor(DocIdSetBuilder result) {
|
||||
this.result = result;
|
||||
this.pointBytes = new byte[bytesPerDim * numDims];
|
||||
}
|
||||
|
@ -286,7 +273,6 @@ public abstract class PointInSetQuery extends Query {
|
|||
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
hitCount[0]++;
|
||||
result.add(docID);
|
||||
}
|
||||
|
||||
|
@ -295,7 +281,6 @@ public abstract class PointInSetQuery extends Query {
|
|||
assert packedValue.length == pointBytes.length;
|
||||
if (Arrays.equals(packedValue, pointBytes)) {
|
||||
// The point for this doc matches the point we are querying on
|
||||
hitCount[0]++;
|
||||
result.add(docID);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -221,7 +221,6 @@ public abstract class PointRangeQuery extends Query {
|
|||
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
||||
|
||||
int[] hitCount = new int[1];
|
||||
values.intersect(field,
|
||||
new IntersectVisitor() {
|
||||
|
||||
|
@ -232,7 +231,6 @@ public abstract class PointRangeQuery extends Query {
|
|||
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
hitCount[0]++;
|
||||
result.add(docID);
|
||||
}
|
||||
|
||||
|
@ -251,7 +249,6 @@ public abstract class PointRangeQuery extends Query {
|
|||
}
|
||||
|
||||
// Doc is in-bounds
|
||||
hitCount[0]++;
|
||||
result.add(docID);
|
||||
}
|
||||
|
||||
|
@ -280,8 +277,7 @@ public abstract class PointRangeQuery extends Query {
|
|||
}
|
||||
});
|
||||
|
||||
// NOTE: hitCount[0] will be over-estimate in multi-valued case
|
||||
return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
|
||||
return new ConstantScoreScorer(this, score(), result.build().iterator());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -169,21 +169,9 @@ public final class DocIdSetBuilder {
|
|||
* Build a {@link DocIdSet} from the accumulated doc IDs.
|
||||
*/
|
||||
public DocIdSet build() {
|
||||
return build(-1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: build a {@link DocIdSet} with a hint on the cost that the resulting
|
||||
* {@link DocIdSet} would have.
|
||||
*/
|
||||
public DocIdSet build(long costHint) {
|
||||
try {
|
||||
if (bitSet != null) {
|
||||
if (costHint == -1) {
|
||||
return new BitDocIdSet(bitSet);
|
||||
} else {
|
||||
return new BitDocIdSet(bitSet, costHint);
|
||||
}
|
||||
} else {
|
||||
LSBRadixSorter sorter = new LSBRadixSorter();
|
||||
sorter.sort(buffer, 0, bufferSize);
|
||||
|
|
|
@ -111,12 +111,10 @@ public class PointInPolygonQuery extends Query {
|
|||
}
|
||||
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
||||
int[] hitCount = new int[1];
|
||||
values.intersect(field,
|
||||
new IntersectVisitor() {
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
hitCount[0]++;
|
||||
result.add(docID);
|
||||
}
|
||||
|
||||
|
@ -126,7 +124,6 @@ public class PointInPolygonQuery extends Query {
|
|||
double lat = LatLonPoint.decodeLat(NumericUtils.bytesToInt(packedValue, 0));
|
||||
double lon = LatLonPoint.decodeLon(NumericUtils.bytesToInt(packedValue, Integer.BYTES));
|
||||
if (GeoRelationUtils.pointInPolygon(polyLons, polyLats, lat, lon)) {
|
||||
hitCount[0]++;
|
||||
result.add(docID);
|
||||
}
|
||||
}
|
||||
|
@ -155,8 +152,7 @@ public class PointInPolygonQuery extends Query {
|
|||
}
|
||||
});
|
||||
|
||||
// NOTE: hitCount[0] will be over-estimate in multi-valued case
|
||||
return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
|
||||
return new ConstantScoreScorer(this, score(), result.build().iterator());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -92,14 +92,12 @@ class PointInGeo3DShapeQuery extends Query {
|
|||
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
||||
|
||||
int[] hitCount = new int[1];
|
||||
values.intersect(field,
|
||||
new IntersectVisitor() {
|
||||
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
result.add(docID);
|
||||
hitCount[0]++;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -110,7 +108,6 @@ class PointInGeo3DShapeQuery extends Query {
|
|||
double z = Geo3DPoint.decodeDimension(planetModel, packedValue, 2 * Integer.BYTES);
|
||||
if (shape.isWithin(x, y, z)) {
|
||||
result.add(docID);
|
||||
hitCount[0]++;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -159,8 +156,7 @@ class PointInGeo3DShapeQuery extends Query {
|
|||
}
|
||||
});
|
||||
|
||||
// NOTE: hitCount[0] will be over-estimate in multi-valued case
|
||||
return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
|
||||
return new ConstantScoreScorer(this, score(), result.build().iterator());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue