LUCENE-7109: LatLonPoint.newPolygonQuery should use two-phase iterator

This commit is contained in:
Robert Muir 2016-03-16 09:25:06 -04:00
parent 0f78235b94
commit 6ea458a0ed
3 changed files with 107 additions and 10 deletions

View File

@ -23,6 +23,9 @@ Optimizations
* LUCENE-7105: Optimize LatLonPoint's newDistanceQuery. (Robert Muir)
* LUCENE-7109: LatLonPoint's newPolygonQuery supports two-phase
iteration. (Robert Muir)
* LUCENE-7097: IntroSorter now recurses to 2 * log_2(count) quicksort
stack depth before switching to heapsort (Adrien Grand, Mike McCandless)

View File

@ -23,15 +23,23 @@ import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.SparseFixedBitSet;
import org.apache.lucene.spatial.util.GeoRelationUtils;
import org.apache.lucene.spatial.util.GeoUtils;
@ -110,9 +118,6 @@ final class LatLonPointInPolygonQuery extends Query {
// I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
// used in the first pass:
// TODO: except that the polygon verify is costly! The approximation should be all docs in all overlapping cells, and matches() should
// then check the polygon
return new ConstantScoreWeight(this) {
@Override
@ -130,22 +135,28 @@ final class LatLonPointInPolygonQuery extends Query {
}
LatLonPoint.checkCompatible(fieldInfo);
// approximation (postfiltering has not yet been applied)
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
// subset of documents that need no postfiltering, this is purely an optimization
final BitSet preApproved;
// dumb heuristic: if the field is really sparse, use a sparse impl
if (values.getDocCount(field) * 100L < reader.maxDoc()) {
preApproved = new SparseFixedBitSet(reader.maxDoc());
} else {
preApproved = new FixedBitSet(reader.maxDoc());
}
values.intersect(field,
new IntersectVisitor() {
@Override
public void visit(int docID) {
result.add(docID);
preApproved.set(docID);
}
@Override
public void visit(int docID, byte[] packedValue) {
assert packedValue.length == 8;
double lat = LatLonPoint.decodeLatitude(packedValue, 0);
double lon = LatLonPoint.decodeLongitude(packedValue, Integer.BYTES);
if (GeoRelationUtils.pointInPolygon(polyLons, polyLats, lat, lon)) {
result.add(docID);
}
// TODO: range checks
result.add(docID);
}
@Override
@ -172,7 +183,41 @@ final class LatLonPointInPolygonQuery extends Query {
}
});
return new ConstantScoreScorer(this, score(), result.build().iterator());
DocIdSet set = result.build();
final DocIdSetIterator disi = set.iterator();
if (disi == null) {
return null;
}
// return two-phase iterator using docvalues to postfilter candidates
SortedNumericDocValues docValues = DocValues.getSortedNumeric(reader, field);
TwoPhaseIterator iterator = new TwoPhaseIterator(disi) {
@Override
public boolean matches() throws IOException {
int docId = disi.docID();
if (preApproved.get(docId)) {
return true;
} else {
docValues.setDocument(docId);
int count = docValues.count();
for (int i = 0; i < count; i++) {
long encoded = docValues.valueAt(i);
double docLatitude = LatLonPoint.decodeLatitude((int)(encoded >> 32));
double docLongitude = LatLonPoint.decodeLongitude((int)(encoded & 0xFFFFFFFF));
if (GeoRelationUtils.pointInPolygon(polyLons, polyLats, docLatitude, docLongitude)) {
return true;
}
}
return false;
}
}
@Override
public float matchCost() {
return 20 * polyLons.length; // TODO: make this fancier, but currently linear with number of vertices
}
};
return new ConstantScoreScorer(this, score(), iterator);
}
};
}

View File

@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
/** Simple tests for {@link LatLonPoint#newPolygonQuery} */
public class TestLatLonPointInPolygonQuery extends LuceneTestCase {
/** test we can search for a polygon */
public void testBasics() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
// add a doc with a point
Document document = new Document();
document.add(new LatLonPoint("field", 18.313694, -65.227444));
writer.addDocument(document);
// search and verify we found our doc
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
assertEquals(1, searcher.count(LatLonPoint.newPolygonQuery("field",
new double[] { 18, 18, 19, 19, 18 },
new double[] { -66, -65, -65, -66, -66 })));
reader.close();
writer.close();
dir.close();
}
}