LUCENE-7051: Remove the "estimate match count" optimization from point queries.

This commit is contained in:
Adrien Grand 2016-02-29 10:04:17 +01:00
parent 5f9db01833
commit 46d05afdae
5 changed files with 9 additions and 48 deletions

View File

@ -19,7 +19,6 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;
import org.apache.lucene.document.BinaryPoint;
import org.apache.lucene.document.DoublePoint;
@ -27,7 +26,6 @@ import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues.IntersectVisitor;
@ -39,8 +37,6 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringHelper;
/**
@ -138,18 +134,16 @@ public abstract class PointInSetQuery extends Query {
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
int[] hitCount = new int[1];
if (numDims == 1) {
// We optimize this common case, effectively doing a merge sort of the indexed values vs the queried set:
values.intersect(field, new MergePointVisitor(sortedPackedPoints, hitCount, result));
values.intersect(field, new MergePointVisitor(sortedPackedPoints, result));
} else {
// NOTE: this is naive implementation, where for each point we re-walk the KD tree to intersect. We could instead do a similar
// optimization as the 1D case, but I think it'd mean building a query-time KD tree so we could efficiently intersect against the
// index, which is probably tricky!
SinglePointVisitor visitor = new SinglePointVisitor(hitCount, result);
SinglePointVisitor visitor = new SinglePointVisitor(result);
TermIterator iterator = sortedPackedPoints.iterator();
for (BytesRef point = iterator.next(); point != null; point = iterator.next()) {
visitor.setPoint(point);
@ -157,8 +151,7 @@ public abstract class PointInSetQuery extends Query {
}
}
// NOTE: hitCount[0] will be over-estimate in multi-valued case
return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
return new ConstantScoreScorer(this, score(), result.build().iterator());
}
};
}
@ -168,15 +161,13 @@ public abstract class PointInSetQuery extends Query {
private class MergePointVisitor implements IntersectVisitor {
private final DocIdSetBuilder result;
private final int[] hitCount;
private TermIterator iterator;
private BytesRef nextQueryPoint;
private final byte[] lastMaxPackedValue;
private final BytesRef scratch = new BytesRef();
private final PrefixCodedTerms sortedPackedPoints;
public MergePointVisitor(PrefixCodedTerms sortedPackedPoints, int[] hitCount, DocIdSetBuilder result) throws IOException {
this.hitCount = hitCount;
public MergePointVisitor(PrefixCodedTerms sortedPackedPoints, DocIdSetBuilder result) throws IOException {
this.result = result;
this.sortedPackedPoints = sortedPackedPoints;
lastMaxPackedValue = new byte[bytesPerDim];
@ -196,7 +187,6 @@ public abstract class PointInSetQuery extends Query {
@Override
public void visit(int docID) {
hitCount[0]++;
result.add(docID);
}
@ -207,7 +197,6 @@ public abstract class PointInSetQuery extends Query {
int cmp = nextQueryPoint.compareTo(scratch);
if (cmp == 0) {
// Query point equals index point, so collect and return
hitCount[0]++;
result.add(docID);
break;
} else if (cmp < 0) {
@ -264,11 +253,9 @@ public abstract class PointInSetQuery extends Query {
private class SinglePointVisitor implements IntersectVisitor {
private final DocIdSetBuilder result;
private final int[] hitCount;
private final byte[] pointBytes;
public SinglePointVisitor(int[] hitCount, DocIdSetBuilder result) {
this.hitCount = hitCount;
public SinglePointVisitor(DocIdSetBuilder result) {
this.result = result;
this.pointBytes = new byte[bytesPerDim * numDims];
}
@ -286,7 +273,6 @@ public abstract class PointInSetQuery extends Query {
@Override
public void visit(int docID) {
hitCount[0]++;
result.add(docID);
}
@ -295,7 +281,6 @@ public abstract class PointInSetQuery extends Query {
assert packedValue.length == pointBytes.length;
if (Arrays.equals(packedValue, pointBytes)) {
// The point for this doc matches the point we are querying on
hitCount[0]++;
result.add(docID);
}
}

View File

@ -221,7 +221,6 @@ public abstract class PointRangeQuery extends Query {
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
int[] hitCount = new int[1];
values.intersect(field,
new IntersectVisitor() {
@ -232,7 +231,6 @@ public abstract class PointRangeQuery extends Query {
@Override
public void visit(int docID) {
hitCount[0]++;
result.add(docID);
}
@ -251,7 +249,6 @@ public abstract class PointRangeQuery extends Query {
}
// Doc is in-bounds
hitCount[0]++;
result.add(docID);
}
@ -280,8 +277,7 @@ public abstract class PointRangeQuery extends Query {
}
});
// NOTE: hitCount[0] will be over-estimate in multi-valued case
return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
return new ConstantScoreScorer(this, score(), result.build().iterator());
}
};
}

View File

@ -169,21 +169,9 @@ public final class DocIdSetBuilder {
* Build a {@link DocIdSet} from the accumulated doc IDs.
*/
public DocIdSet build() {
return build(-1);
}
/**
* Expert: build a {@link DocIdSet} with a hint on the cost that the resulting
* {@link DocIdSet} would have.
*/
public DocIdSet build(long costHint) {
try {
if (bitSet != null) {
if (costHint == -1) {
return new BitDocIdSet(bitSet);
} else {
return new BitDocIdSet(bitSet, costHint);
}
return new BitDocIdSet(bitSet);
} else {
LSBRadixSorter sorter = new LSBRadixSorter();
sorter.sort(buffer, 0, bufferSize);

View File

@ -111,12 +111,10 @@ public class PointInPolygonQuery extends Query {
}
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
int[] hitCount = new int[1];
values.intersect(field,
new IntersectVisitor() {
@Override
public void visit(int docID) {
hitCount[0]++;
result.add(docID);
}
@ -126,7 +124,6 @@ public class PointInPolygonQuery extends Query {
double lat = LatLonPoint.decodeLat(NumericUtils.bytesToInt(packedValue, 0));
double lon = LatLonPoint.decodeLon(NumericUtils.bytesToInt(packedValue, Integer.BYTES));
if (GeoRelationUtils.pointInPolygon(polyLons, polyLats, lat, lon)) {
hitCount[0]++;
result.add(docID);
}
}
@ -155,8 +152,7 @@ public class PointInPolygonQuery extends Query {
}
});
// NOTE: hitCount[0] will be over-estimate in multi-valued case
return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
return new ConstantScoreScorer(this, score(), result.build().iterator());
}
};
}

View File

@ -92,14 +92,12 @@ class PointInGeo3DShapeQuery extends Query {
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
int[] hitCount = new int[1];
values.intersect(field,
new IntersectVisitor() {
@Override
public void visit(int docID) {
result.add(docID);
hitCount[0]++;
}
@Override
@ -110,7 +108,6 @@ class PointInGeo3DShapeQuery extends Query {
double z = Geo3DPoint.decodeDimension(planetModel, packedValue, 2 * Integer.BYTES);
if (shape.isWithin(x, y, z)) {
result.add(docID);
hitCount[0]++;
}
}
@ -159,8 +156,7 @@ class PointInGeo3DShapeQuery extends Query {
}
});
// NOTE: hitCount[0] will be over-estimate in multi-valued case
return new ConstantScoreScorer(this, score(), result.build(hitCount[0]).iterator());
return new ConstantScoreScorer(this, score(), result.build().iterator());
}
};
}