mirror of https://github.com/apache/lucene.git
LUCENE-7069: add LatLonPoint.nearest to find N nearest points
This commit is contained in:
parent
feee5e1400
commit
5bbb5e7748
|
@ -13,6 +13,9 @@ New Features
|
|||
* LUCENE-7140: Add PlanetModel.bisection to spatial3d (Karl Wright via
|
||||
Mike McCandless)
|
||||
|
||||
* LUCENE-7069: Add LatLonPoint.nearest, to find nearest N points to a
|
||||
provided query point (Mike McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-7163: refactor GeoRect, Polygon, and GeoUtils tests to geo
|
||||
|
|
|
@ -114,7 +114,10 @@ public class Lucene60PointsReader extends PointsReader implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
private BKDReader getBKDReader(String fieldName) {
|
||||
/** Returns the underlying {@link BKDReader}.
|
||||
*
|
||||
* @lucene.internal */
|
||||
public BKDReader getBKDReader(String fieldName) {
|
||||
FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(fieldName);
|
||||
if (fieldInfo == null) {
|
||||
throw new IllegalArgumentException("field=\"" + fieldName + "\" is unrecognized");
|
||||
|
|
|
@ -205,12 +205,7 @@ public class BKDReader implements Accountable {
|
|||
byte[] rootMinPacked = new byte[packedBytesLength];
|
||||
byte[] rootMaxPacked = new byte[packedBytesLength];
|
||||
Arrays.fill(rootMaxPacked, (byte) 0xff);
|
||||
|
||||
IntersectState state = new IntersectState(in.clone(), numDims, packedBytesLength,
|
||||
maxPointsInLeafNode,
|
||||
new VerifyVisitor(numDims, bytesPerDim, maxDoc));
|
||||
|
||||
verify(state, 1, rootMinPacked, rootMaxPacked);
|
||||
verify(getIntersectState(new VerifyVisitor(numDims, bytesPerDim, maxDoc)), 1, rootMinPacked, rootMaxPacked);
|
||||
}
|
||||
|
||||
private void verify(IntersectState state, int nodeID, byte[] cellMinPacked, byte[] cellMaxPacked) throws IOException {
|
||||
|
@ -258,7 +253,8 @@ public class BKDReader implements Accountable {
|
|||
}
|
||||
}
|
||||
|
||||
static final class IntersectState {
|
||||
/** Used to track all state for a single call to {@link #intersect}. */
|
||||
public static final class IntersectState {
|
||||
final IndexInput in;
|
||||
final int[] scratchDocIDs;
|
||||
final byte[] scratchPackedValue;
|
||||
|
@ -279,11 +275,7 @@ public class BKDReader implements Accountable {
|
|||
}
|
||||
|
||||
public void intersect(IntersectVisitor visitor) throws IOException {
|
||||
IntersectState state = new IntersectState(in.clone(), numDims,
|
||||
packedBytesLength,
|
||||
maxPointsInLeafNode,
|
||||
visitor);
|
||||
intersect(state, 1, minPackedValue, maxPackedValue);
|
||||
intersect(getIntersectState(visitor), 1, minPackedValue, maxPackedValue);
|
||||
}
|
||||
|
||||
/** Fast path: this is called when the query box fully encompasses all cells under this node. */
|
||||
|
@ -300,6 +292,25 @@ public class BKDReader implements Accountable {
|
|||
}
|
||||
}
|
||||
|
||||
/** Create a new {@link IntersectState} */
|
||||
public IntersectState getIntersectState(IntersectVisitor visitor) {
|
||||
return new IntersectState(in.clone(), numDims,
|
||||
packedBytesLength,
|
||||
maxPointsInLeafNode,
|
||||
visitor);
|
||||
}
|
||||
|
||||
/** Visits all docIDs and packed values in a single leaf block */
|
||||
public void visitLeafBlockValues(int nodeID, IntersectState state) throws IOException {
|
||||
int leafID = nodeID - leafNodeOffset;
|
||||
|
||||
// Leaf node; scan and filter all points in this block:
|
||||
int count = readDocIDs(state.in, leafBlockFPs[leafID], state.scratchDocIDs);
|
||||
|
||||
// Again, this time reading values and checking with the visitor
|
||||
visitDocValues(state.commonPrefixLengths, state.scratchPackedValue, state.in, state.scratchDocIDs, count, state.visitor);
|
||||
}
|
||||
|
||||
protected void visitDocIDs(IndexInput in, long blockFP, IntersectVisitor visitor) throws IOException {
|
||||
// Leaf node
|
||||
in.seek(blockFP);
|
||||
|
@ -414,6 +425,14 @@ public class BKDReader implements Accountable {
|
|||
}
|
||||
}
|
||||
|
||||
/** Copies the split value for this node into the provided byte array */
|
||||
public void copySplitValue(int nodeID, byte[] splitPackedValue) {
|
||||
int address = nodeID * (bytesPerDim+1);
|
||||
int splitDim = splitPackedValues[address] & 0xff;
|
||||
assert splitDim < numDims;
|
||||
System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return splitPackedValues.length +
|
||||
|
@ -443,4 +462,8 @@ public class BKDReader implements Accountable {
|
|||
public int getDocCount() {
|
||||
return docCount;
|
||||
}
|
||||
|
||||
public boolean isLeafNode(int nodeID) {
|
||||
return nodeID >= leafNodeOffset;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,20 +16,33 @@
|
|||
*/
|
||||
package org.apache.lucene.document;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.codecs.lucene60.Lucene60PointsFormat;
|
||||
import org.apache.lucene.codecs.lucene60.Lucene60PointsReader;
|
||||
import org.apache.lucene.geo.Polygon;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.FieldDoc;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.PointRangeQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.geo.Polygon;
|
||||
import org.apache.lucene.search.TopFieldDocs;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.util.bkd.BKDReader;
|
||||
|
||||
import static org.apache.lucene.geo.GeoEncodingUtils.decodeLatitude;
|
||||
import static org.apache.lucene.geo.GeoEncodingUtils.decodeLongitude;
|
||||
|
@ -289,4 +302,42 @@ public class LatLonPoint extends Field {
|
|||
public static SortField newDistanceSort(String field, double latitude, double longitude) {
|
||||
return new LatLonPointSortField(field, latitude, longitude);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the {@code topN} nearest indexed points to the provided point, according to Haversine distance.
|
||||
* This is functionally equivalent to running {@link MatchAllDocsQuery} with a {@link #newDistanceSort},
|
||||
* but is far more efficient since it takes advantage of properties the indexed BKD tree. Currently this
|
||||
* only works with {@link Lucene60PointsFormat} (used by the default codec).
|
||||
*/
|
||||
public static TopFieldDocs nearest(IndexSearcher s, String fieldName, double latitude, double longitude, int n) throws IOException {
|
||||
List<BKDReader> readers = new ArrayList<>();
|
||||
List<Integer> docBases = new ArrayList<>();
|
||||
List<Bits> liveDocs = new ArrayList<>();
|
||||
int totalHits = 0;
|
||||
for(LeafReaderContext leaf : s.getIndexReader().leaves()) {
|
||||
PointValues points = leaf.reader().getPointValues();
|
||||
if (points != null) {
|
||||
if (points instanceof Lucene60PointsReader == false) {
|
||||
throw new IllegalArgumentException("can only run on Lucene60PointsReader points implementation, but got " + points);
|
||||
}
|
||||
totalHits += points.getDocCount(fieldName);
|
||||
BKDReader reader = ((Lucene60PointsReader) points).getBKDReader(fieldName);
|
||||
if (reader != null) {
|
||||
readers.add(reader);
|
||||
docBases.add(leaf.docBase);
|
||||
liveDocs.add(leaf.reader().getLiveDocs());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NearestNeighbor.NearestHit[] hits = NearestNeighbor.nearest(latitude, longitude, readers, liveDocs, docBases, n);
|
||||
|
||||
// Convert to TopFieldDocs:
|
||||
ScoreDoc[] scoreDocs = new ScoreDoc[hits.length];
|
||||
for(int i=0;i<hits.length;i++) {
|
||||
NearestNeighbor.NearestHit hit = hits[i];
|
||||
scoreDocs[i] = new FieldDoc(hit.docID, 0.0f, new Object[] {Double.valueOf(hit.distanceMeters)});
|
||||
}
|
||||
return new TopFieldDocs(totalHits, scoreDocs, null, 0.0f);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,316 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.document;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.PriorityQueue;
|
||||
|
||||
import org.apache.lucene.geo.Rectangle;
|
||||
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
||||
import org.apache.lucene.index.PointValues.Relation;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.SloppyMath;
|
||||
import org.apache.lucene.util.bkd.BKDReader;
|
||||
|
||||
import static org.apache.lucene.geo.GeoEncodingUtils.decodeLatitude;
|
||||
import static org.apache.lucene.geo.GeoEncodingUtils.decodeLongitude;
|
||||
|
||||
/**
|
||||
* KNN search on top of 2D lat/lon indexed points.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
class NearestNeighbor {
|
||||
|
||||
static class Cell implements Comparable<Cell> {
|
||||
final int readerIndex;
|
||||
final int nodeID;
|
||||
final byte[] minPacked;
|
||||
final byte[] maxPacked;
|
||||
|
||||
/** The closest possible distance of all points in this cell */
|
||||
final double distanceMeters;
|
||||
|
||||
public Cell(int readerIndex, int nodeID, byte[] minPacked, byte[] maxPacked, double distanceMeters) {
|
||||
this.readerIndex = readerIndex;
|
||||
this.nodeID = nodeID;
|
||||
this.minPacked = minPacked.clone();
|
||||
this.maxPacked = maxPacked.clone();
|
||||
this.distanceMeters = distanceMeters;
|
||||
}
|
||||
|
||||
public int compareTo(Cell other) {
|
||||
return Double.compare(distanceMeters, other.distanceMeters);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
double minLat = decodeLatitude(minPacked, 0);
|
||||
double minLon = decodeLongitude(minPacked, Integer.BYTES);
|
||||
double maxLat = decodeLatitude(maxPacked, 0);
|
||||
double maxLon = decodeLongitude(maxPacked, Integer.BYTES);
|
||||
return "Cell(readerIndex=" + readerIndex + " lat=" + minLat + " TO " + maxLat + ", lon=" + minLon + " TO " + maxLon + ")";
|
||||
}
|
||||
}
|
||||
|
||||
private static class NearestVisitor implements IntersectVisitor {
|
||||
|
||||
public int curDocBase;
|
||||
public Bits curLiveDocs;
|
||||
final int topN;
|
||||
final PriorityQueue<NearestHit> hitQueue;
|
||||
final double pointLat;
|
||||
final double pointLon;
|
||||
private int setBottomCounter;
|
||||
|
||||
private double minLon = Double.NEGATIVE_INFINITY;
|
||||
private double maxLon = Double.POSITIVE_INFINITY;
|
||||
private double minLat = Double.NEGATIVE_INFINITY;
|
||||
private double maxLat = Double.POSITIVE_INFINITY;
|
||||
|
||||
// second set of longitude ranges to check (for cross-dateline case)
|
||||
private double minLon2 = Double.POSITIVE_INFINITY;
|
||||
|
||||
int pointCheckCount;
|
||||
|
||||
public NearestVisitor(PriorityQueue<NearestHit> hitQueue, int topN, double pointLat, double pointLon) {
|
||||
this.hitQueue = hitQueue;
|
||||
this.topN = topN;
|
||||
this.pointLat = pointLat;
|
||||
this.pointLon = pointLon;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
||||
private void maybeUpdateBBox() {
|
||||
if (setBottomCounter < 1024 || (setBottomCounter & 0x3F) == 0x3F) {
|
||||
NearestHit hit = hitQueue.peek();
|
||||
Rectangle box = Rectangle.fromPointDistance(pointLat, pointLon, hit.distanceMeters);
|
||||
minLat = box.minLat;
|
||||
maxLat = box.maxLat;
|
||||
if (box.crossesDateline()) {
|
||||
// box1
|
||||
minLon = Double.NEGATIVE_INFINITY;
|
||||
maxLon = box.maxLon;
|
||||
// box2
|
||||
minLon2 = box.minLon;
|
||||
} else {
|
||||
minLon = box.minLon;
|
||||
maxLon = box.maxLon;
|
||||
// disable box2
|
||||
minLon2 = Double.POSITIVE_INFINITY;
|
||||
}
|
||||
}
|
||||
setBottomCounter++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) {
|
||||
//System.out.println("visit docID=" + docID + " liveDocs=" + curLiveDocs);
|
||||
|
||||
if (curLiveDocs != null && curLiveDocs.get(docID) == false) {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: work in int space, use haversinSortKey
|
||||
|
||||
double docLatitude = decodeLatitude(packedValue, 0);
|
||||
double docLongitude = decodeLongitude(packedValue, Integer.BYTES);
|
||||
|
||||
// test bounding box
|
||||
if (docLatitude < minLat || docLatitude > maxLat) {
|
||||
return;
|
||||
}
|
||||
if ((docLongitude < minLon || docLongitude > maxLon) && (docLongitude < minLon2)) {
|
||||
return;
|
||||
}
|
||||
|
||||
pointCheckCount++;
|
||||
|
||||
double distanceMeters = SloppyMath.haversinMeters(pointLat, pointLon, docLatitude, docLongitude);
|
||||
|
||||
//System.out.println(" visit docID=" + docID + " distanceMeters=" + distanceMeters + " docLat=" + docLatitude + " docLon=" + docLongitude);
|
||||
|
||||
int fullDocID = curDocBase + docID;
|
||||
|
||||
if (hitQueue.size() == topN) {
|
||||
// queue already full
|
||||
NearestHit hit = hitQueue.peek();
|
||||
//System.out.println(" bottom distanceMeters=" + hit.distanceMeters);
|
||||
// we don't collect docs in order here, so we must also test the tie-break case ourselves:
|
||||
if (distanceMeters < hit.distanceMeters || (distanceMeters == hit.distanceMeters && fullDocID < hit.docID)) {
|
||||
hitQueue.poll();
|
||||
hit.docID = fullDocID;
|
||||
hit.distanceMeters = distanceMeters;
|
||||
hitQueue.offer(hit);
|
||||
//System.out.println(" ** keep2, now bottom=" + hit);
|
||||
maybeUpdateBBox();
|
||||
}
|
||||
|
||||
} else {
|
||||
NearestHit hit = new NearestHit();
|
||||
hit.docID = fullDocID;
|
||||
hit.distanceMeters = distanceMeters;
|
||||
hitQueue.offer(hit);
|
||||
//System.out.println(" ** keep1, now bottom=" + hit);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
throw new AssertionError();
|
||||
}
|
||||
}
|
||||
|
||||
/** Holds one hit from {@link LatLonPoint#nearest} */
|
||||
static class NearestHit {
|
||||
public int docID;
|
||||
public double distanceMeters;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "NearestHit(docID=" + docID + " distanceMeters=" + distanceMeters + ")";
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: can we somehow share more with, or simply directly use, the LatLonPointDistanceComparator? It's really doing the same thing as
|
||||
// our hitQueue...
|
||||
|
||||
public static NearestHit[] nearest(double pointLat, double pointLon, List<BKDReader> readers, List<Bits> liveDocs, List<Integer> docBases, final int n) throws IOException {
|
||||
|
||||
//System.out.println("NEAREST: r=" + r + " liveDocs=" + liveDocs);
|
||||
// Holds closest collected points seen so far:
|
||||
// TODO: if we used lucene's PQ we could just updateTop instead of poll/offer:
|
||||
final PriorityQueue<NearestHit> hitQueue = new PriorityQueue<>(n, new Comparator<NearestHit>() {
|
||||
@Override
|
||||
public int compare(NearestHit a, NearestHit b) {
|
||||
// sort by opposite distanceMeters natural order
|
||||
int cmp = Double.compare(a.distanceMeters, b.distanceMeters);
|
||||
if (cmp != 0) {
|
||||
return -cmp;
|
||||
}
|
||||
|
||||
// tie-break by higher docID:
|
||||
return b.docID - a.docID;
|
||||
}
|
||||
});
|
||||
|
||||
// Holds all cells, sorted by closest to the point:
|
||||
PriorityQueue<Cell> cellQueue = new PriorityQueue<>();
|
||||
|
||||
NearestVisitor visitor = new NearestVisitor(hitQueue, n, pointLat, pointLon);
|
||||
List<BKDReader.IntersectState> states = new ArrayList<>();
|
||||
|
||||
// Add root cell for each reader into the queue:
|
||||
for(int i=0;i<readers.size();i++) {
|
||||
BKDReader reader = readers.get(i);
|
||||
byte[] minPackedValue = reader.getMinPackedValue();
|
||||
double minLat = decodeLatitude(minPackedValue, 0);
|
||||
double minLon = decodeLongitude(minPackedValue, Integer.BYTES);
|
||||
|
||||
byte[] maxPackedValue = reader.getMaxPackedValue();
|
||||
double maxLat = decodeLatitude(maxPackedValue, 0);
|
||||
double maxLon = decodeLongitude(maxPackedValue, Integer.BYTES);
|
||||
states.add(reader.getIntersectState(visitor));
|
||||
|
||||
cellQueue.offer(new Cell(i, 1, reader.getMinPackedValue(), reader.getMaxPackedValue(),
|
||||
approxBestDistance(minLat, maxLat, minLon, maxLon, pointLat, pointLon)));
|
||||
}
|
||||
|
||||
while (cellQueue.size() > 0) {
|
||||
Cell cell = cellQueue.poll();
|
||||
//System.out.println(" visit " + cell);
|
||||
|
||||
// TODO: if we replace approxBestDistance with actualBestDistance, we can put an opto here to break once this "best" cell is fully outside of the hitQueue bottom's radius:
|
||||
BKDReader reader = readers.get(cell.readerIndex);
|
||||
|
||||
if (reader.isLeafNode(cell.nodeID)) {
|
||||
//System.out.println(" leaf");
|
||||
// Leaf block: visit all points and possibly collect them:
|
||||
visitor.curDocBase = docBases.get(cell.readerIndex);
|
||||
visitor.curLiveDocs = liveDocs.get(cell.readerIndex);
|
||||
reader.visitLeafBlockValues(cell.nodeID, states.get(cell.readerIndex));
|
||||
//System.out.println(" now " + hitQueue.size() + " hits");
|
||||
} else {
|
||||
//System.out.println(" non-leaf");
|
||||
// Non-leaf block: split into two cells and put them back into the queue:
|
||||
|
||||
double cellMinLat = decodeLatitude(cell.minPacked, 0);
|
||||
double cellMinLon = decodeLongitude(cell.minPacked, Integer.BYTES);
|
||||
double cellMaxLat = decodeLatitude(cell.maxPacked, 0);
|
||||
double cellMaxLon = decodeLongitude(cell.maxPacked, Integer.BYTES);
|
||||
|
||||
if (cellMaxLat < visitor.minLat || visitor.maxLat < cellMinLat || ((cellMaxLon < visitor.minLon || visitor.maxLon < cellMinLon) && cellMaxLon < visitor.minLon2)) {
|
||||
// this cell is outside our search bbox; don't bother exploring any more
|
||||
continue;
|
||||
}
|
||||
|
||||
byte[] splitPackedValue = cell.maxPacked.clone();
|
||||
reader.copySplitValue(cell.nodeID, splitPackedValue);
|
||||
cellQueue.offer(new Cell(cell.readerIndex, 2*cell.nodeID, cell.minPacked, splitPackedValue,
|
||||
approxBestDistance(cell.minPacked, splitPackedValue, pointLat, pointLon)));
|
||||
|
||||
splitPackedValue = cell.minPacked.clone();
|
||||
reader.copySplitValue(cell.nodeID, splitPackedValue);
|
||||
cellQueue.offer(new Cell(cell.readerIndex, 2*cell.nodeID+1, splitPackedValue, cell.maxPacked,
|
||||
approxBestDistance(splitPackedValue, cell.maxPacked, pointLat, pointLon)));
|
||||
}
|
||||
}
|
||||
|
||||
NearestHit[] hits = new NearestHit[hitQueue.size()];
|
||||
int downTo = hitQueue.size()-1;
|
||||
while (hitQueue.size() != 0) {
|
||||
hits[downTo] = hitQueue.poll();
|
||||
downTo--;
|
||||
}
|
||||
|
||||
return hits;
|
||||
}
|
||||
|
||||
// NOTE: incoming args never cross the dateline, since they are a BKD cell
|
||||
private static double approxBestDistance(byte[] minPackedValue, byte[] maxPackedValue, double pointLat, double pointLon) {
|
||||
double minLat = decodeLatitude(minPackedValue, 0);
|
||||
double minLon = decodeLongitude(minPackedValue, Integer.BYTES);
|
||||
double maxLat = decodeLatitude(maxPackedValue, 0);
|
||||
double maxLon = decodeLongitude(maxPackedValue, Integer.BYTES);
|
||||
return approxBestDistance(minLat, maxLat, minLon, maxLon, pointLat, pointLon);
|
||||
}
|
||||
|
||||
// NOTE: incoming args never cross the dateline, since they are a BKD cell
|
||||
private static double approxBestDistance(double minLat, double maxLat, double minLon, double maxLon, double pointLat, double pointLon) {
|
||||
|
||||
// TODO: can we make this the trueBestDistance? I.e., minimum distance between the point and ANY point on the box? we can speed things
|
||||
// up if so, but not enrolling any BKD cell whose true best distance is > bottom of the current hit queue
|
||||
|
||||
if (pointLat >= minLat && pointLat <= maxLat && pointLon >= minLon && pointLon <= minLon) {
|
||||
// point is inside the cell!
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
double d1 = SloppyMath.haversinMeters(pointLat, pointLon, minLat, minLon);
|
||||
double d2 = SloppyMath.haversinMeters(pointLat, pointLon, minLat, maxLon);
|
||||
double d3 = SloppyMath.haversinMeters(pointLat, pointLon, maxLat, maxLon);
|
||||
double d4 = SloppyMath.haversinMeters(pointLat, pointLon, maxLat, minLon);
|
||||
return Math.min(Math.min(d1, d2), Math.min(d3, d4));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,245 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.document;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.document.NearestNeighbor.NearestHit;
|
||||
import org.apache.lucene.geo.GeoEncodingUtils;
|
||||
import org.apache.lucene.geo.GeoTestUtil;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SerialMergeScheduler;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.FieldDoc;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TopFieldDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.SloppyMath;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestNearest extends LuceneTestCase {
|
||||
|
||||
public void testNearestNeighborWithDeletedDocs() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, getIndexWriterConfig());
|
||||
Document doc = new Document();
|
||||
doc.add(new LatLonPoint("point", 40.0, 50.0));
|
||||
doc.add(new StringField("id", "0", Field.Store.YES));
|
||||
w.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new LatLonPoint("point", 45.0, 55.0));
|
||||
doc.add(new StringField("id", "1", Field.Store.YES));
|
||||
w.addDocument(doc);
|
||||
|
||||
DirectoryReader r = w.getReader();
|
||||
IndexSearcher s = newSearcher(r);
|
||||
FieldDoc hit = (FieldDoc) LatLonPoint.nearest(s, "point", 40.0, 50.0, 1).scoreDocs[0];
|
||||
assertEquals("0", r.document(hit.doc).getField("id").stringValue());
|
||||
r.close();
|
||||
|
||||
w.deleteDocuments(new Term("id", "0"));
|
||||
r = w.getReader();
|
||||
s = newSearcher(r);
|
||||
hit = (FieldDoc) LatLonPoint.nearest(s, "point", 40.0, 50.0, 1).scoreDocs[0];
|
||||
assertEquals("1", r.document(hit.doc).getField("id").stringValue());
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testNearestNeighborWithAllDeletedDocs() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, getIndexWriterConfig());
|
||||
Document doc = new Document();
|
||||
doc.add(new LatLonPoint("point", 40.0, 50.0));
|
||||
doc.add(new StringField("id", "0", Field.Store.YES));
|
||||
w.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new LatLonPoint("point", 45.0, 55.0));
|
||||
doc.add(new StringField("id", "1", Field.Store.YES));
|
||||
w.addDocument(doc);
|
||||
|
||||
DirectoryReader r = w.getReader();
|
||||
IndexSearcher s = newSearcher(r);
|
||||
FieldDoc hit = (FieldDoc) LatLonPoint.nearest(s, "point", 40.0, 50.0, 1).scoreDocs[0];
|
||||
assertEquals("0", r.document(hit.doc).getField("id").stringValue());
|
||||
r.close();
|
||||
|
||||
w.deleteDocuments(new Term("id", "0"));
|
||||
w.deleteDocuments(new Term("id", "1"));
|
||||
r = w.getReader();
|
||||
s = newSearcher(r);
|
||||
assertEquals(0, LatLonPoint.nearest(s, "point", 40.0, 50.0, 1).scoreDocs.length);
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testTieBreakByDocID() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, getIndexWriterConfig());
|
||||
Document doc = new Document();
|
||||
doc.add(new LatLonPoint("point", 40.0, 50.0));
|
||||
doc.add(new StringField("id", "0", Field.Store.YES));
|
||||
w.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new LatLonPoint("point", 40.0, 50.0));
|
||||
doc.add(new StringField("id", "1", Field.Store.YES));
|
||||
w.addDocument(doc);
|
||||
|
||||
DirectoryReader r = DirectoryReader.open(w);
|
||||
ScoreDoc[] hits = LatLonPoint.nearest(newSearcher(r), "point", 45.0, 50.0, 2).scoreDocs;
|
||||
assertEquals("0", r.document(hits[0].doc).getField("id").stringValue());
|
||||
assertEquals("1", r.document(hits[1].doc).getField("id").stringValue());
|
||||
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testNearestNeighborWithNoDocs() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, getIndexWriterConfig());
|
||||
DirectoryReader r = w.getReader();
|
||||
assertEquals(0, LatLonPoint.nearest(newSearcher(r), "point", 40.0, 50.0, 1).scoreDocs.length);
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private double quantizeLat(double latRaw) {
|
||||
return GeoEncodingUtils.decodeLatitude(GeoEncodingUtils.encodeLatitude(latRaw));
|
||||
}
|
||||
|
||||
private double quantizeLon(double lonRaw) {
|
||||
return GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(lonRaw));
|
||||
}
|
||||
|
||||
public void testNearestNeighborRandom() throws Exception {
|
||||
|
||||
int numPoints = atLeast(5000);
|
||||
Directory dir;
|
||||
if (numPoints > 100000) {
|
||||
dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
|
||||
} else {
|
||||
dir = newDirectory();
|
||||
}
|
||||
double[] lats = new double[numPoints];
|
||||
double[] lons = new double[numPoints];
|
||||
|
||||
IndexWriterConfig iwc = getIndexWriterConfig();
|
||||
iwc.setMergePolicy(newLogMergePolicy());
|
||||
iwc.setMergeScheduler(new SerialMergeScheduler());
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
for(int id=0;id<numPoints;id++) {
|
||||
lats[id] = quantizeLat(GeoTestUtil.nextLatitude());
|
||||
lons[id] = quantizeLon(GeoTestUtil.nextLongitude());
|
||||
Document doc = new Document();
|
||||
doc.add(new LatLonPoint("point", lats[id], lons[id]));
|
||||
doc.add(new StoredField("id", id));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
w.forceMerge(1);
|
||||
}
|
||||
|
||||
DirectoryReader r = w.getReader();
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: reader=" + r);
|
||||
}
|
||||
IndexSearcher s = newSearcher(r);
|
||||
int iters = atLeast(100);
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: iter=" + iter);
|
||||
}
|
||||
double pointLat = GeoTestUtil.nextLatitude();
|
||||
double pointLon = GeoTestUtil.nextLongitude();
|
||||
|
||||
// dumb brute force search to get the expected result:
|
||||
NearestHit[] expectedHits = new NearestHit[lats.length];
|
||||
for(int id=0;id<lats.length;id++) {
|
||||
NearestHit hit = new NearestHit();
|
||||
hit.distanceMeters = SloppyMath.haversinMeters(pointLat, pointLon, lats[id], lons[id]);
|
||||
hit.docID = id;
|
||||
expectedHits[id] = hit;
|
||||
}
|
||||
|
||||
Arrays.sort(expectedHits, new Comparator<NearestHit>() {
|
||||
@Override
|
||||
public int compare(NearestHit a, NearestHit b) {
|
||||
int cmp = Double.compare(a.distanceMeters, b.distanceMeters);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
// tie break by smaller docID:
|
||||
return a.docID - b.docID;
|
||||
}
|
||||
});
|
||||
|
||||
int topN = TestUtil.nextInt(random(), 1, lats.length);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nhits for pointLat=" + pointLat + " pointLon=" + pointLon);
|
||||
}
|
||||
|
||||
// Also test with MatchAllDocsQuery, sorting by distance:
|
||||
TopFieldDocs fieldDocs = s.search(new MatchAllDocsQuery(), topN, new Sort(LatLonPoint.newDistanceSort("point", pointLat, pointLon)));
|
||||
|
||||
ScoreDoc[] hits = LatLonPoint.nearest(s, "point", pointLat, pointLon, topN).scoreDocs;
|
||||
for(int i=0;i<topN;i++) {
|
||||
NearestHit expected = expectedHits[i];
|
||||
FieldDoc expected2 = (FieldDoc) fieldDocs.scoreDocs[i];
|
||||
FieldDoc actual = (FieldDoc) hits[i];
|
||||
Document actualDoc = r.document(actual.doc);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("hit " + i);
|
||||
System.out.println(" expected id=" + expected.docID + " lat=" + lats[expected.docID] + " lon=" + lons[expected.docID] + " distance=" + expected.distanceMeters + " meters");
|
||||
System.out.println(" actual id=" + actualDoc.getField("id") + " distance=" + actual.fields[0] + " meters");
|
||||
}
|
||||
|
||||
assertEquals(expected.docID, actual.doc);
|
||||
assertEquals(expected.distanceMeters, ((Double) actual.fields[0]).doubleValue(), 0.0);
|
||||
|
||||
assertEquals(expected.docID, expected.docID);
|
||||
assertEquals(((Double) expected2.fields[0]).doubleValue(), expected.distanceMeters, 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private IndexWriterConfig getIndexWriterConfig() {
|
||||
IndexWriterConfig iwc = newIndexWriterConfig();
|
||||
iwc.setCodec(Codec.forName("Lucene60"));
|
||||
return iwc;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue