LUCENE-7099: use two-phase iteration in LatLonPoint.newDistanceQuery

This commit is contained in:
Robert Muir 2016-03-12 10:58:32 -05:00
parent 500fc15734
commit 85d268ee8b
4 changed files with 80 additions and 13 deletions

View File

@ -15,6 +15,9 @@ Optimizations
* LUCENE-7071: Reduce bytes copying in OfflineSorter, giving ~10% * LUCENE-7071: Reduce bytes copying in OfflineSorter, giving ~10%
speedup on merging 2D LatLonPoint values (Mike McCandless) speedup on merging 2D LatLonPoint values (Mike McCandless)
* LUCENE-7099: LatLonPoint's newDistanceQuery supports two-phase
iteration. (Robert Muir)
Other Other
* LUCENE-7087: Let MemoryIndex#fromDocument(...) accept 'Iterable<? extends IndexableField>' * LUCENE-7087: Let MemoryIndex#fromDocument(...) accept 'Iterable<? extends IndexableField>'

View File

@ -96,8 +96,10 @@ public class LatLonPoint extends Field {
} }
private static final int BITS = 32; private static final int BITS = 32;
private static final double LONGITUDE_SCALE = (0x1L<<BITS)/360.0D; private static final double LONGITUDE_ENCODE = (0x1L<<BITS)/360.0D;
private static final double LATITUDE_SCALE = (0x1L<<BITS)/180.0D; private static final double LONGITUDE_DECODE = 1/LONGITUDE_ENCODE;
private static final double LATITUDE_ENCODE = (0x1L<<BITS)/180.0D;
private static final double LATITUDE_DECODE = 1/LATITUDE_ENCODE;
@Override @Override
public String toString() { public String toString() {
@ -142,7 +144,7 @@ public class LatLonPoint extends Field {
if (latitude == 90.0D) { if (latitude == 90.0D) {
latitude = Math.nextDown(latitude); latitude = Math.nextDown(latitude);
} }
return Math.toIntExact((long) (latitude * LATITUDE_SCALE)); return Math.toIntExact((long) (latitude * LATITUDE_ENCODE));
} }
/** /**
@ -159,7 +161,7 @@ public class LatLonPoint extends Field {
if (longitude == 180.0D) { if (longitude == 180.0D) {
longitude = Math.nextDown(longitude); longitude = Math.nextDown(longitude);
} }
return Math.toIntExact((long) (longitude * LONGITUDE_SCALE)); return Math.toIntExact((long) (longitude * LONGITUDE_ENCODE));
} }
/** /**
@ -168,7 +170,7 @@ public class LatLonPoint extends Field {
* @return decoded latitude value. * @return decoded latitude value.
*/ */
public static double decodeLatitude(int encoded) { public static double decodeLatitude(int encoded) {
double result = encoded / LATITUDE_SCALE; double result = encoded * LATITUDE_DECODE;
assert GeoUtils.isValidLat(result); assert GeoUtils.isValidLat(result);
return result; return result;
} }
@ -189,7 +191,7 @@ public class LatLonPoint extends Field {
* @return decoded longitude value. * @return decoded longitude value.
*/ */
public static double decodeLongitude(int encoded) { public static double decodeLongitude(int encoded) {
double result = encoded / LONGITUDE_SCALE; double result = encoded * LONGITUDE_DECODE;
assert GeoUtils.isValidLon(result); assert GeoUtils.isValidLon(result);
return result; return result;
} }

View File

@ -18,22 +18,30 @@ package org.apache.lucene.document;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues; import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.PointValues.IntersectVisitor; import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation; import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.search.ConstantScoreScorer; import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight; import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.spatial.util.GeoDistanceUtils; import org.apache.lucene.spatial.util.GeoDistanceUtils;
import org.apache.lucene.spatial.util.GeoRect; import org.apache.lucene.spatial.util.GeoRect;
import org.apache.lucene.spatial.util.GeoUtils; import org.apache.lucene.spatial.util.GeoUtils;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.DocIdSetBuilder; import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.SparseFixedBitSet;
/** /**
* Distance query for {@link LatLonPoint}. * Distance query for {@link LatLonPoint}.
@ -95,22 +103,32 @@ final class LatLonPointDistanceQuery extends Query {
} }
LatLonPoint.checkCompatible(fieldInfo); LatLonPoint.checkCompatible(fieldInfo);
// approximation (postfiltering has not yet been applied)
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc()); DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
// subset of documents that need no postfiltering, this is purely an optimization
final BitSet preApproved;
// dumb heuristic: if the field is really sparse, use a sparse impl
if (values.getDocCount(field) * 100L < reader.maxDoc()) {
preApproved = new SparseFixedBitSet(reader.maxDoc());
} else {
preApproved = new FixedBitSet(reader.maxDoc());
}
values.intersect(field, values.intersect(field,
new IntersectVisitor() { new IntersectVisitor() {
@Override
public void grow(int count) {
result.grow(count);
}
@Override @Override
public void visit(int docID) { public void visit(int docID) {
result.add(docID); result.add(docID);
preApproved.set(docID);
} }
@Override @Override
public void visit(int docID, byte[] packedValue) { public void visit(int docID, byte[] packedValue) {
assert packedValue.length == 8; result.add(docID);
double lat = LatLonPoint.decodeLatitude(packedValue, 0);
double lon = LatLonPoint.decodeLongitude(packedValue, Integer.BYTES);
if (GeoDistanceUtils.haversin(latitude, longitude, lat, lon) <= radiusMeters) {
visit(docID);
}
} }
// algorithm: we create a bounding box (two bounding boxes if we cross the dateline). // algorithm: we create a bounding box (two bounding boxes if we cross the dateline).
@ -142,7 +160,41 @@ final class LatLonPointDistanceQuery extends Query {
} }
}); });
return new ConstantScoreScorer(this, score(), result.build().iterator()); DocIdSet set = result.build();
final DocIdSetIterator disi = set.iterator();
if (disi == null) {
return null;
}
// return two-phase iterator using docvalues to postfilter candidates
SortedNumericDocValues docValues = DocValues.getSortedNumeric(reader, field);
TwoPhaseIterator iterator = new TwoPhaseIterator(disi) {
@Override
public boolean matches() throws IOException {
int docId = disi.docID();
if (preApproved.get(docId)) {
return true;
} else {
docValues.setDocument(docId);
int count = docValues.count();
for (int i = 0; i < count; i++) {
long encoded = docValues.valueAt(i);
double docLatitude = LatLonPoint.decodeLatitude((int)(encoded >> 32));
double docLongitude = LatLonPoint.decodeLongitude((int)(encoded & 0xFFFFFFFF));
if (GeoDistanceUtils.haversin(latitude, longitude, docLatitude, docLongitude) <= radiusMeters) {
return true;
}
}
return false;
}
}
@Override
public float matchCost() {
return 20; // TODO: make this fancier
}
};
return new ConstantScoreScorer(this, score(), iterator);
} }
}; };
} }

View File

@ -159,6 +159,16 @@ public class TestLatLonPoint extends LuceneTestCase {
assertEquals(-180.0, LatLonPoint.decodeLongitude(LatLonPoint.encodeLongitude(-180.0)), ENCODING_TOLERANCE); assertEquals(-180.0, LatLonPoint.decodeLongitude(LatLonPoint.encodeLongitude(-180.0)), ENCODING_TOLERANCE);
} }
public void testEncodeDecodeExtremeValues() throws Exception {
assertEquals(Integer.MIN_VALUE, LatLonPoint.encodeLatitude(-90.0));
assertEquals(0, LatLonPoint.encodeLatitude(0.0));
assertEquals(Integer.MAX_VALUE, LatLonPoint.encodeLatitude(90.0));
assertEquals(Integer.MIN_VALUE, LatLonPoint.encodeLongitude(-180.0));
assertEquals(0, LatLonPoint.encodeLatitude(0.0));
assertEquals(Integer.MAX_VALUE, LatLonPoint.encodeLongitude(180.0));
}
public void testEncodeDecodeIsStable() throws Exception { public void testEncodeDecodeIsStable() throws Exception {
int iters = atLeast(1000); int iters = atLeast(1000);
for(int iter=0;iter<iters;iter++) { for(int iter=0;iter<iters;iter++) {