mirror of https://github.com/apache/lucene.git
LUCENE-7109: LatLonPoint.newPolygonQuery should use two-phase iterator
This commit is contained in:
parent
0f78235b94
commit
6ea458a0ed
|
@ -23,6 +23,9 @@ Optimizations
|
|||
|
||||
* LUCENE-7105: Optimize LatLonPoint's newDistanceQuery. (Robert Muir)
|
||||
|
||||
* LUCENE-7109: LatLonPoint's newPolygonQuery supports two-phase
|
||||
iteration. (Robert Muir)
|
||||
|
||||
* LUCENE-7097: IntroSorter now recurses to 2 * log_2(count) quicksort
|
||||
stack depth before switching to heapsort (Adrien Grand, Mike McCandless)
|
||||
|
||||
|
|
|
@ -23,15 +23,23 @@ import org.apache.lucene.index.PointValues.IntersectVisitor;
|
|||
import org.apache.lucene.index.PointValues.Relation;
|
||||
import org.apache.lucene.search.ConstantScoreScorer;
|
||||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.util.BitSet;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.SparseFixedBitSet;
|
||||
import org.apache.lucene.spatial.util.GeoRelationUtils;
|
||||
import org.apache.lucene.spatial.util.GeoUtils;
|
||||
|
||||
|
@ -110,9 +118,6 @@ final class LatLonPointInPolygonQuery extends Query {
|
|||
// I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
|
||||
// used in the first pass:
|
||||
|
||||
// TODO: except that the polygon verify is costly! The approximation should be all docs in all overlapping cells, and matches() should
|
||||
// then check the polygon
|
||||
|
||||
return new ConstantScoreWeight(this) {
|
||||
|
||||
@Override
|
||||
|
@ -130,22 +135,28 @@ final class LatLonPointInPolygonQuery extends Query {
|
|||
}
|
||||
LatLonPoint.checkCompatible(fieldInfo);
|
||||
|
||||
// approximation (postfiltering has not yet been applied)
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
||||
// subset of documents that need no postfiltering, this is purely an optimization
|
||||
final BitSet preApproved;
|
||||
// dumb heuristic: if the field is really sparse, use a sparse impl
|
||||
if (values.getDocCount(field) * 100L < reader.maxDoc()) {
|
||||
preApproved = new SparseFixedBitSet(reader.maxDoc());
|
||||
} else {
|
||||
preApproved = new FixedBitSet(reader.maxDoc());
|
||||
}
|
||||
values.intersect(field,
|
||||
new IntersectVisitor() {
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
result.add(docID);
|
||||
preApproved.set(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) {
|
||||
assert packedValue.length == 8;
|
||||
double lat = LatLonPoint.decodeLatitude(packedValue, 0);
|
||||
double lon = LatLonPoint.decodeLongitude(packedValue, Integer.BYTES);
|
||||
if (GeoRelationUtils.pointInPolygon(polyLons, polyLats, lat, lon)) {
|
||||
result.add(docID);
|
||||
}
|
||||
// TODO: range checks
|
||||
result.add(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -172,7 +183,41 @@ final class LatLonPointInPolygonQuery extends Query {
|
|||
}
|
||||
});
|
||||
|
||||
return new ConstantScoreScorer(this, score(), result.build().iterator());
|
||||
DocIdSet set = result.build();
|
||||
final DocIdSetIterator disi = set.iterator();
|
||||
if (disi == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// return two-phase iterator using docvalues to postfilter candidates
|
||||
SortedNumericDocValues docValues = DocValues.getSortedNumeric(reader, field);
|
||||
TwoPhaseIterator iterator = new TwoPhaseIterator(disi) {
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
int docId = disi.docID();
|
||||
if (preApproved.get(docId)) {
|
||||
return true;
|
||||
} else {
|
||||
docValues.setDocument(docId);
|
||||
int count = docValues.count();
|
||||
for (int i = 0; i < count; i++) {
|
||||
long encoded = docValues.valueAt(i);
|
||||
double docLatitude = LatLonPoint.decodeLatitude((int)(encoded >> 32));
|
||||
double docLongitude = LatLonPoint.decodeLongitude((int)(encoded & 0xFFFFFFFF));
|
||||
if (GeoRelationUtils.pointInPolygon(polyLons, polyLats, docLatitude, docLongitude)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return 20 * polyLons.length; // TODO: make this fancier, but currently linear with number of vertices
|
||||
}
|
||||
};
|
||||
return new ConstantScoreScorer(this, score(), iterator);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.document;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
/** Simple tests for {@link LatLonPoint#newPolygonQuery} */
|
||||
public class TestLatLonPointInPolygonQuery extends LuceneTestCase {
|
||||
|
||||
/** test we can search for a polygon */
|
||||
public void testBasics() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
|
||||
|
||||
// add a doc with a point
|
||||
Document document = new Document();
|
||||
document.add(new LatLonPoint("field", 18.313694, -65.227444));
|
||||
writer.addDocument(document);
|
||||
|
||||
// search and verify we found our doc
|
||||
IndexReader reader = writer.getReader();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
assertEquals(1, searcher.count(LatLonPoint.newPolygonQuery("field",
|
||||
new double[] { 18, 18, 19, 19, 18 },
|
||||
new double[] { -66, -65, -65, -66, -66 })));
|
||||
|
||||
reader.close();
|
||||
writer.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue