mirror of https://github.com/apache/lucene.git
LUCENE-7254: (sandbox/ only) Don't let abuse cases slow down spatial queries
This commit is contained in:
parent
89857653ca
commit
111107b3bd
|
@ -34,7 +34,6 @@ import org.apache.lucene.search.FieldDoc;
|
|||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.PointRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopFieldDocs;
|
||||
|
@ -229,7 +228,7 @@ public class LatLonPoint extends Field {
|
|||
}
|
||||
|
||||
private static Query newBoxInternal(String field, byte[] min, byte[] max) {
|
||||
return new PointRangeQuery(field, min, max, 2) {
|
||||
return new LatLonPointBoxQuery(field, min, max, 2) {
|
||||
@Override
|
||||
protected String toString(int dimension, byte[] value) {
|
||||
if (dimension == 0) {
|
||||
|
|
|
@ -0,0 +1,287 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.document;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
||||
import org.apache.lucene.index.PointValues.Relation;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.PointRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
/**
|
||||
* Fast version of {@link PointRangeQuery}. It is fast for actual range queries!
|
||||
* @lucene.experimental
|
||||
*/
|
||||
abstract class LatLonPointBoxQuery extends Query {
|
||||
final String field;
|
||||
final int numDims;
|
||||
final int bytesPerDim;
|
||||
final byte[] lowerPoint;
|
||||
final byte[] upperPoint;
|
||||
|
||||
/**
|
||||
* Expert: create a multidimensional range query for point values.
|
||||
*
|
||||
* @param field field name. must not be {@code null}.
|
||||
* @param lowerPoint lower portion of the range (inclusive).
|
||||
* @param upperPoint upper portion of the range (inclusive).
|
||||
* @param numDims number of dimensions.
|
||||
* @throws IllegalArgumentException if {@code field} is null, or if {@code lowerValue.length != upperValue.length}
|
||||
*/
|
||||
protected LatLonPointBoxQuery(String field, byte[] lowerPoint, byte[] upperPoint, int numDims) {
|
||||
checkArgs(field, lowerPoint, upperPoint);
|
||||
this.field = field;
|
||||
if (numDims <= 0) {
|
||||
throw new IllegalArgumentException("numDims must be positive, got " + numDims);
|
||||
}
|
||||
if (lowerPoint.length == 0) {
|
||||
throw new IllegalArgumentException("lowerPoint has length of zero");
|
||||
}
|
||||
if (lowerPoint.length % numDims != 0) {
|
||||
throw new IllegalArgumentException("lowerPoint is not a fixed multiple of numDims");
|
||||
}
|
||||
if (lowerPoint.length != upperPoint.length) {
|
||||
throw new IllegalArgumentException("lowerPoint has length=" + lowerPoint.length + " but upperPoint has different length=" + upperPoint.length);
|
||||
}
|
||||
this.numDims = numDims;
|
||||
this.bytesPerDim = lowerPoint.length / numDims;
|
||||
|
||||
this.lowerPoint = lowerPoint;
|
||||
this.upperPoint = upperPoint;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check preconditions for all factory methods
|
||||
* @throws IllegalArgumentException if {@code field}, {@code lowerPoint} or {@code upperPoint} are null.
|
||||
*/
|
||||
public static void checkArgs(String field, Object lowerPoint, Object upperPoint) {
|
||||
if (field == null) {
|
||||
throw new IllegalArgumentException("field must not be null");
|
||||
}
|
||||
if (lowerPoint == null) {
|
||||
throw new IllegalArgumentException("lowerPoint must not be null");
|
||||
}
|
||||
if (upperPoint == null) {
|
||||
throw new IllegalArgumentException("upperPoint must not be null");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
|
||||
// We don't use RandomAccessWeight here: it's no good to approximate with "match all docs".
|
||||
// This is an inverted structure and should be used in the first pass:
|
||||
|
||||
return new ConstantScoreWeight(this) {
|
||||
|
||||
private DocIdSetIterator buildMatchingIterator(LeafReader reader, PointValues values) throws IOException {
|
||||
MatchingPoints result = new MatchingPoints(reader, field);
|
||||
|
||||
values.intersect(field,
|
||||
new IntersectVisitor() {
|
||||
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
result.add(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) {
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
int offset = dim*bytesPerDim;
|
||||
if (StringHelper.compare(bytesPerDim, packedValue, offset, lowerPoint, offset) < 0) {
|
||||
// Doc's value is too low, in this dimension
|
||||
return;
|
||||
}
|
||||
if (StringHelper.compare(bytesPerDim, packedValue, offset, upperPoint, offset) > 0) {
|
||||
// Doc's value is too high, in this dimension
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Doc is in-bounds
|
||||
result.add(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
|
||||
boolean crosses = false;
|
||||
|
||||
for(int dim=0;dim<numDims;dim++) {
|
||||
int offset = dim*bytesPerDim;
|
||||
|
||||
if (StringHelper.compare(bytesPerDim, minPackedValue, offset, upperPoint, offset) > 0 ||
|
||||
StringHelper.compare(bytesPerDim, maxPackedValue, offset, lowerPoint, offset) < 0) {
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
}
|
||||
|
||||
crosses |= StringHelper.compare(bytesPerDim, minPackedValue, offset, lowerPoint, offset) < 0 ||
|
||||
StringHelper.compare(bytesPerDim, maxPackedValue, offset, upperPoint, offset) > 0;
|
||||
}
|
||||
|
||||
if (crosses) {
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
} else {
|
||||
return Relation.CELL_INSIDE_QUERY;
|
||||
}
|
||||
}
|
||||
});
|
||||
return result.iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
LeafReader reader = context.reader();
|
||||
PointValues values = reader.getPointValues();
|
||||
if (values == null) {
|
||||
// No docs in this segment indexed any points
|
||||
return null;
|
||||
}
|
||||
FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
|
||||
if (fieldInfo == null) {
|
||||
// No docs in this segment indexed this field at all
|
||||
return null;
|
||||
}
|
||||
if (fieldInfo.getPointDimensionCount() != numDims) {
|
||||
throw new IllegalArgumentException("field=\"" + field + "\" was indexed with numDims=" + fieldInfo.getPointDimensionCount() + " but this query has numDims=" + numDims);
|
||||
}
|
||||
if (bytesPerDim != fieldInfo.getPointNumBytes()) {
|
||||
throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + fieldInfo.getPointNumBytes() + " but this query has bytesPerDim=" + bytesPerDim);
|
||||
}
|
||||
|
||||
boolean allDocsMatch;
|
||||
if (values.getDocCount(field) == reader.maxDoc()) {
|
||||
final byte[] fieldPackedLower = values.getMinPackedValue(field);
|
||||
final byte[] fieldPackedUpper = values.getMaxPackedValue(field);
|
||||
allDocsMatch = true;
|
||||
for (int i = 0; i < numDims; ++i) {
|
||||
int offset = i * bytesPerDim;
|
||||
if (StringHelper.compare(bytesPerDim, lowerPoint, offset, fieldPackedLower, offset) > 0
|
||||
|| StringHelper.compare(bytesPerDim, upperPoint, offset, fieldPackedUpper, offset) < 0) {
|
||||
allDocsMatch = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
allDocsMatch = false;
|
||||
}
|
||||
|
||||
DocIdSetIterator iterator;
|
||||
if (allDocsMatch) {
|
||||
// all docs have a value and all points are within bounds, so everything matches
|
||||
iterator = DocIdSetIterator.all(reader.maxDoc());
|
||||
} else {
|
||||
iterator = buildMatchingIterator(reader, values);
|
||||
}
|
||||
|
||||
return new ConstantScoreScorer(this, score(), iterator);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int hashCode() {
|
||||
int hash = super.hashCode();
|
||||
hash = 31 * hash + field.hashCode();
|
||||
hash = 31 * hash + Arrays.hashCode(lowerPoint);
|
||||
hash = 31 * hash + Arrays.hashCode(upperPoint);
|
||||
hash = 31 * hash + numDims;
|
||||
hash = 31 * hash + Objects.hashCode(bytesPerDim);
|
||||
return hash;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean equals(Object other) {
|
||||
if (super.equals(other) == false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
final LatLonPointBoxQuery q = (LatLonPointBoxQuery) other;
|
||||
if (field.equals(q.field) == false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (q.numDims != numDims) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (q.bytesPerDim != bytesPerDim) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Arrays.equals(lowerPoint, q.lowerPoint) == false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Arrays.equals(upperPoint, q.upperPoint) == false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final String toString(String field) {
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
if (this.field.equals(field) == false) {
|
||||
sb.append(this.field);
|
||||
sb.append(':');
|
||||
}
|
||||
|
||||
// print ourselves as "range per dimension"
|
||||
for (int i = 0; i < numDims; i++) {
|
||||
if (i > 0) {
|
||||
sb.append(',');
|
||||
}
|
||||
|
||||
int startOffset = bytesPerDim * i;
|
||||
|
||||
sb.append('[');
|
||||
sb.append(toString(i, Arrays.copyOfRange(lowerPoint, startOffset, startOffset + bytesPerDim)));
|
||||
sb.append(" TO ");
|
||||
sb.append(toString(i, Arrays.copyOfRange(upperPoint, startOffset, startOffset + bytesPerDim)));
|
||||
sb.append(']');
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string of a single value in a human-readable format for debugging.
|
||||
* This is used by {@link #toString()}.
|
||||
*
|
||||
* @param dimension dimension of the particular value
|
||||
* @param value single value, never null
|
||||
* @return human readable value for debugging
|
||||
*/
|
||||
protected abstract String toString(int dimension, byte[] value);
|
||||
}
|
|
@ -28,13 +28,10 @@ import org.apache.lucene.index.PointValues.IntersectVisitor;
|
|||
import org.apache.lucene.index.PointValues.Relation;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.util.SloppyMath;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
@ -120,15 +117,10 @@ final class LatLonPointDistanceQuery extends Query {
|
|||
LatLonPoint.checkCompatible(fieldInfo);
|
||||
|
||||
// matching docids
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
||||
MatchingPoints result = new MatchingPoints(reader, field);
|
||||
|
||||
values.intersect(field,
|
||||
new IntersectVisitor() {
|
||||
@Override
|
||||
public void grow(int count) {
|
||||
result.grow(count);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
result.add(docID);
|
||||
|
@ -209,12 +201,7 @@ final class LatLonPointDistanceQuery extends Query {
|
|||
}
|
||||
});
|
||||
|
||||
DocIdSet set = result.build();
|
||||
final DocIdSetIterator disi = set.iterator();
|
||||
if (disi == null) {
|
||||
return null;
|
||||
}
|
||||
return new ConstantScoreScorer(this, score(), disi);
|
||||
return new ConstantScoreScorer(this, score(), result.iterator());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -24,8 +24,6 @@ import org.apache.lucene.index.PointValues.IntersectVisitor;
|
|||
import org.apache.lucene.index.PointValues.Relation;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
|
@ -34,7 +32,6 @@ import org.apache.lucene.index.PointValues;
|
|||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.geo.Polygon;
|
||||
|
@ -113,15 +110,10 @@ final class LatLonPointInPolygonQuery extends Query {
|
|||
LatLonPoint.checkCompatible(fieldInfo);
|
||||
|
||||
// matching docids
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
||||
MatchingPoints result = new MatchingPoints(reader, field);
|
||||
|
||||
values.intersect(field,
|
||||
new IntersectVisitor() {
|
||||
@Override
|
||||
public void grow(int count) {
|
||||
result.grow(count);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
result.add(docID);
|
||||
|
@ -154,13 +146,7 @@ final class LatLonPointInPolygonQuery extends Query {
|
|||
}
|
||||
});
|
||||
|
||||
DocIdSet set = result.build();
|
||||
final DocIdSetIterator disi = set.iterator();
|
||||
if (disi == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return new ConstantScoreScorer(this, score(), disi);
|
||||
return new ConstantScoreScorer(this, score(), result.iterator());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.document;
|
||||
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BitSet;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.SparseFixedBitSet;
|
||||
|
||||
/**
|
||||
* Accumulates matching hits for points.
|
||||
* <p>
|
||||
* Add matches with ({@link #add(int)}) and call {@link #iterator()} for
|
||||
* an iterator over the results.
|
||||
* <p>
|
||||
* This implementation currently optimizes bitset structure (sparse vs dense)
|
||||
* and {@link DocIdSetIterator#cost()} (cardinality) based on index statistics.
|
||||
* This API may change as point values evolves.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
final class MatchingPoints {
|
||||
/** bitset we collect into */
|
||||
private final BitSet bits;
|
||||
/** number of documents containing a value for the points field */
|
||||
private final int docCount;
|
||||
/** number of values indexed for the points field */
|
||||
private final long numPoints;
|
||||
/** number of documents in the index segment */
|
||||
private final int maxDoc;
|
||||
/** counter of hits seen */
|
||||
private long counter;
|
||||
|
||||
/**
|
||||
* Creates a new accumulator.
|
||||
* @param reader reader to collect point matches from
|
||||
* @param field field name.
|
||||
*/
|
||||
public MatchingPoints(LeafReader reader, String field) {
|
||||
maxDoc = reader.maxDoc();
|
||||
PointValues values = reader.getPointValues();
|
||||
if (values == null) {
|
||||
throw new IllegalStateException("the query is missing null checks");
|
||||
}
|
||||
docCount = values.getDocCount(field);
|
||||
numPoints = values.size(field);
|
||||
// heuristic: if the field is really sparse, use a sparse impl
|
||||
if (docCount >= 0 && docCount * 100L < maxDoc) {
|
||||
bits = new SparseFixedBitSet(maxDoc);
|
||||
} else {
|
||||
bits = new FixedBitSet(maxDoc);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a matching docid.
|
||||
* <p>
|
||||
* NOTE: doc IDs do not need to be provided in any order.
|
||||
*/
|
||||
public void add(int doc) {
|
||||
bits.set(doc);
|
||||
counter++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an iterator over the recorded matches.
|
||||
*/
|
||||
public DocIdSetIterator iterator() {
|
||||
// if single-valued (docCount == numPoints), then this is exact
|
||||
// otherwise its approximate based on field stats
|
||||
return new BitSetIterator(bits, (long) (counter * (docCount / (double) numPoints)));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue