mirror of
https://github.com/apache/lucene.git
synced 2025-02-09 03:25:15 +00:00
LUCENE-7109: LatLonPoint.newPolygonQuery should use two-phase iterator
This commit is contained in:
parent
4deb4cd1ba
commit
e68dc4a330
@ -20,6 +20,9 @@ Optimizations
|
|||||||
|
|
||||||
* LUCENE-7105: Optimize LatLonPoint's newDistanceQuery. (Robert Muir)
|
* LUCENE-7105: Optimize LatLonPoint's newDistanceQuery. (Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-7109: LatLonPoint's newPolygonQuery supports two-phase
|
||||||
|
iteration. (Robert Muir)
|
||||||
|
|
||||||
* LUCENE-7097: IntroSorter now recurses to 2 * log_2(count) quicksort
|
* LUCENE-7097: IntroSorter now recurses to 2 * log_2(count) quicksort
|
||||||
stack depth before switching to heapsort (Adrien Grand, Mike McCandless)
|
stack depth before switching to heapsort (Adrien Grand, Mike McCandless)
|
||||||
|
|
||||||
|
@ -23,15 +23,23 @@ import org.apache.lucene.index.PointValues.IntersectVisitor;
|
|||||||
import org.apache.lucene.index.PointValues.Relation;
|
import org.apache.lucene.index.PointValues.Relation;
|
||||||
import org.apache.lucene.search.ConstantScoreScorer;
|
import org.apache.lucene.search.ConstantScoreScorer;
|
||||||
import org.apache.lucene.search.ConstantScoreWeight;
|
import org.apache.lucene.search.ConstantScoreWeight;
|
||||||
|
import org.apache.lucene.search.DocIdSet;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.Scorer;
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.search.TwoPhaseIterator;
|
||||||
import org.apache.lucene.search.Weight;
|
import org.apache.lucene.search.Weight;
|
||||||
import org.apache.lucene.index.PointValues;
|
import org.apache.lucene.index.PointValues;
|
||||||
|
import org.apache.lucene.index.SortedNumericDocValues;
|
||||||
|
import org.apache.lucene.index.DocValues;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.util.BitSet;
|
||||||
import org.apache.lucene.util.DocIdSetBuilder;
|
import org.apache.lucene.util.DocIdSetBuilder;
|
||||||
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
|
import org.apache.lucene.util.SparseFixedBitSet;
|
||||||
import org.apache.lucene.spatial.util.GeoRelationUtils;
|
import org.apache.lucene.spatial.util.GeoRelationUtils;
|
||||||
import org.apache.lucene.spatial.util.GeoUtils;
|
import org.apache.lucene.spatial.util.GeoUtils;
|
||||||
|
|
||||||
@ -110,9 +118,6 @@ final class LatLonPointInPolygonQuery extends Query {
|
|||||||
// I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
|
// I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
|
||||||
// used in the first pass:
|
// used in the first pass:
|
||||||
|
|
||||||
// TODO: except that the polygon verify is costly! The approximation should be all docs in all overlapping cells, and matches() should
|
|
||||||
// then check the polygon
|
|
||||||
|
|
||||||
return new ConstantScoreWeight(this) {
|
return new ConstantScoreWeight(this) {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -130,22 +135,28 @@ final class LatLonPointInPolygonQuery extends Query {
|
|||||||
}
|
}
|
||||||
LatLonPoint.checkCompatible(fieldInfo);
|
LatLonPoint.checkCompatible(fieldInfo);
|
||||||
|
|
||||||
|
// approximation (postfiltering has not yet been applied)
|
||||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
||||||
|
// subset of documents that need no postfiltering, this is purely an optimization
|
||||||
|
final BitSet preApproved;
|
||||||
|
// dumb heuristic: if the field is really sparse, use a sparse impl
|
||||||
|
if (values.getDocCount(field) * 100L < reader.maxDoc()) {
|
||||||
|
preApproved = new SparseFixedBitSet(reader.maxDoc());
|
||||||
|
} else {
|
||||||
|
preApproved = new FixedBitSet(reader.maxDoc());
|
||||||
|
}
|
||||||
values.intersect(field,
|
values.intersect(field,
|
||||||
new IntersectVisitor() {
|
new IntersectVisitor() {
|
||||||
@Override
|
@Override
|
||||||
public void visit(int docID) {
|
public void visit(int docID) {
|
||||||
result.add(docID);
|
result.add(docID);
|
||||||
|
preApproved.set(docID);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void visit(int docID, byte[] packedValue) {
|
public void visit(int docID, byte[] packedValue) {
|
||||||
assert packedValue.length == 8;
|
// TODO: range checks
|
||||||
double lat = LatLonPoint.decodeLatitude(packedValue, 0);
|
result.add(docID);
|
||||||
double lon = LatLonPoint.decodeLongitude(packedValue, Integer.BYTES);
|
|
||||||
if (GeoRelationUtils.pointInPolygon(polyLons, polyLats, lat, lon)) {
|
|
||||||
result.add(docID);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -172,7 +183,41 @@ final class LatLonPointInPolygonQuery extends Query {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
return new ConstantScoreScorer(this, score(), result.build().iterator());
|
DocIdSet set = result.build();
|
||||||
|
final DocIdSetIterator disi = set.iterator();
|
||||||
|
if (disi == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// return two-phase iterator using docvalues to postfilter candidates
|
||||||
|
SortedNumericDocValues docValues = DocValues.getSortedNumeric(reader, field);
|
||||||
|
TwoPhaseIterator iterator = new TwoPhaseIterator(disi) {
|
||||||
|
@Override
|
||||||
|
public boolean matches() throws IOException {
|
||||||
|
int docId = disi.docID();
|
||||||
|
if (preApproved.get(docId)) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
docValues.setDocument(docId);
|
||||||
|
int count = docValues.count();
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
long encoded = docValues.valueAt(i);
|
||||||
|
double docLatitude = LatLonPoint.decodeLatitude((int)(encoded >> 32));
|
||||||
|
double docLongitude = LatLonPoint.decodeLongitude((int)(encoded & 0xFFFFFFFF));
|
||||||
|
if (GeoRelationUtils.pointInPolygon(polyLons, polyLats, docLatitude, docLongitude)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float matchCost() {
|
||||||
|
return 20 * polyLons.length; // TODO: make this fancier, but currently linear with number of vertices
|
||||||
|
}
|
||||||
|
};
|
||||||
|
return new ConstantScoreScorer(this, score(), iterator);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,49 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.document;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
/** Simple tests for {@link LatLonPoint#newPolygonQuery} */
|
||||||
|
public class TestLatLonPointInPolygonQuery extends LuceneTestCase {
|
||||||
|
|
||||||
|
/** test we can search for a polygon */
|
||||||
|
public void testBasics() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
|
||||||
|
|
||||||
|
// add a doc with a point
|
||||||
|
Document document = new Document();
|
||||||
|
document.add(new LatLonPoint("field", 18.313694, -65.227444));
|
||||||
|
writer.addDocument(document);
|
||||||
|
|
||||||
|
// search and verify we found our doc
|
||||||
|
IndexReader reader = writer.getReader();
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
assertEquals(1, searcher.count(LatLonPoint.newPolygonQuery("field",
|
||||||
|
new double[] { 18, 18, 19, 19, 18 },
|
||||||
|
new double[] { -66, -65, -65, -66, -66 })));
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
writer.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user