LUCENE-7240: Remove DocValues from LatLonPoint, add DocValuesField for that

This commit is contained in:
Robert Muir 2016-04-24 17:15:30 -04:00
parent 7acf8babae
commit 45c48da54a
8 changed files with 197 additions and 65 deletions

View File

@ -10,8 +10,8 @@ http://s.apache.org/luceneversions
New Features
* LUCENE-7099: Add LatLonPoint.newDistanceSort to the sandbox's
LatLonPoint. (Robert Muir)
* LUCENE-7099: Add LatLonDocValuesField.newDistanceSort to the sandbox.
(Robert Muir)
* LUCENE-7140: Add PlanetModel.bisection to spatial3d (Karl Wright via
Mike McCandless)

View File

@ -0,0 +1,135 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;
import static org.apache.lucene.geo.GeoEncodingUtils.decodeLatitude;
import static org.apache.lucene.geo.GeoEncodingUtils.decodeLongitude;
import static org.apache.lucene.geo.GeoEncodingUtils.encodeLatitude;
import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitude;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.SortField;
/**
* An per-document location field.
* <p>
* Sorting by distance is efficient. Multiple values for the same field in one document
* is allowed.
* <p>
* This field defines static factory methods for common operations:
* <ul>
* <li>{@link #newDistanceSort newDistanceSort()} for ordering documents by distance from a specified location.
* </ul>
* <p>
* If you also need query operations, you should add a separate {@link LatLonPoint} instance.
* If you also need to store the value, you should add a separate {@link StoredField} instance.
* <p>
* <b>WARNING</b>: Values are indexed with some loss of precision from the
* original {@code double} values (4.190951585769653E-8 for the latitude component
* and 8.381903171539307E-8 for longitude).
* @see LatLonPoint
*/
public class LatLonDocValuesField extends Field {
/**
* Type for a LatLonDocValuesField
* <p>
* Each value stores a 64-bit long where the upper 32 bits are the encoded latitude,
* and the lower 32 bits are the encoded longitude.
* @see org.apache.lucene.geo.GeoEncodingUtils#decodeLatitude(int)
* @see org.apache.lucene.geo.GeoEncodingUtils#decodeLongitude(int)
*/
public static final FieldType TYPE = new FieldType();
static {
TYPE.setDocValuesType(DocValuesType.SORTED_NUMERIC);
TYPE.freeze();
}
/**
* Creates a new LatLonDocValuesField with the specified latitude and longitude
* @param name field name
* @param latitude latitude value: must be within standard +/-90 coordinate bounds.
* @param longitude longitude value: must be within standard +/-180 coordinate bounds.
* @throws IllegalArgumentException if the field name is null or latitude or longitude are out of bounds
*/
public LatLonDocValuesField(String name, double latitude, double longitude) {
super(name, TYPE);
setLocationValue(latitude, longitude);
}
/**
* Change the values of this field
* @param latitude latitude value: must be within standard +/-90 coordinate bounds.
* @param longitude longitude value: must be within standard +/-180 coordinate bounds.
* @throws IllegalArgumentException if latitude or longitude are out of bounds
*/
public void setLocationValue(double latitude, double longitude) {
int latitudeEncoded = encodeLatitude(latitude);
int longitudeEncoded = encodeLongitude(longitude);
fieldsData = Long.valueOf((((long)latitudeEncoded) << 32) | (longitudeEncoded & 0xFFFFFFFFL));
}
/** helper: checks a fieldinfo and throws exception if its definitely not a LatLonDocValuesField */
static void checkCompatible(FieldInfo fieldInfo) {
// dv properties could be "unset", if you e.g. used only StoredField with this same name in the segment.
if (fieldInfo.getDocValuesType() != DocValuesType.NONE && fieldInfo.getDocValuesType() != TYPE.docValuesType()) {
throw new IllegalArgumentException("field=\"" + fieldInfo.name + "\" was indexed with docValuesType=" + fieldInfo.getDocValuesType() +
" but this type has docValuesType=" + TYPE.docValuesType() +
", is the field really a LatLonDocValuesField?");
}
}
@Override
public String toString() {
StringBuilder result = new StringBuilder();
result.append(getClass().getSimpleName());
result.append(" <");
result.append(name);
result.append(':');
long currentValue = Long.valueOf((Long)fieldsData);
result.append(decodeLatitude((int)(currentValue >> 32)));
result.append(',');
result.append(decodeLongitude((int)(currentValue & 0xFFFFFFFF)));
result.append('>');
return result.toString();
}
/**
* Creates a SortField for sorting by distance from a location.
* <p>
* This sort orders documents by ascending distance from the location. The value returned in {@link FieldDoc} for
* the hits contains a Double instance with the distance in meters.
* <p>
* If a document is missing the field, then by default it is treated as having {@link Double#POSITIVE_INFINITY} distance
* (missing values sort last).
* <p>
* If a document contains multiple values for the field, the <i>closest</i> distance to the location is used.
*
* @param field field name. must not be null.
* @param latitude latitude at the center: must be within standard +/-90 coordinate bounds.
* @param longitude longitude at the center: must be within standard +/-180 coordinate bounds.
* @return SortField ordering documents by distance
* @throws IllegalArgumentException if {@code field} is null or location has invalid coordinates.
*/
public static SortField newDistanceSort(String field, double latitude, double longitude) {
return new LatLonPointSortField(field, latitude, longitude);
}
}

View File

@ -24,7 +24,6 @@ import org.apache.lucene.codecs.lucene60.Lucene60PointsFormat;
import org.apache.lucene.codecs.lucene60.Lucene60PointsReader;
import org.apache.lucene.geo.GeoUtils;
import org.apache.lucene.geo.Polygon;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
@ -38,7 +37,6 @@ import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PointRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -63,21 +61,23 @@ import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitudeCeil;
* <ul>
* <li>{@link #newBoxQuery newBoxQuery()} for matching points within a bounding box.
* <li>{@link #newDistanceQuery newDistanceQuery()} for matching points within a specified distance.
* <li>{@link #newDistanceSort newDistanceSort()} for ordering documents by distance from a specified location.
* <li>{@link #newPolygonQuery newPolygonQuery()} for matching points within an arbitrary polygon.
* <li>{@link #nearest nearest()} for finding the k-nearest neighbors by distance.
* </ul>
* <p>
* If you also need per-document operations such as sort by distance, add a separate {@link LatLonDocValuesField} instance.
* If you also need to store the value, you should add a separate {@link StoredField} instance.
* <p>
* <b>WARNING</b>: Values are indexed with some loss of precision from the
* original {@code double} values (4.190951585769653E-8 for the latitude component
* and 8.381903171539307E-8 for longitude).
* @see PointValues
* @see LatLonDocValuesField
*/
// TODO ^^^ that is very sandy and hurts the API, usage, and tests tremendously, because what the user passes
// to the field is not actually what gets indexed. Float would be 1E-5 error vs 1E-7, but it might be
// a better tradeoff? then it would be completely transparent to the user and lucene would be "lossless".
public class LatLonPoint extends Field {
private long currentValue;
/**
* Type for an indexed LatLonPoint
@ -87,7 +87,6 @@ public class LatLonPoint extends Field {
public static final FieldType TYPE = new FieldType();
static {
TYPE.setDimensions(2, Integer.BYTES);
TYPE.setDocValuesType(DocValuesType.SORTED_NUMERIC);
TYPE.freeze();
}
@ -98,13 +97,19 @@ public class LatLonPoint extends Field {
* @throws IllegalArgumentException if latitude or longitude are out of bounds
*/
public void setLocationValue(double latitude, double longitude) {
byte[] bytes = new byte[8];
final byte[] bytes;
if (fieldsData == null) {
bytes = new byte[8];
fieldsData = new BytesRef(bytes);
} else {
bytes = ((BytesRef) fieldsData).bytes;
}
int latitudeEncoded = encodeLatitude(latitude);
int longitudeEncoded = encodeLongitude(longitude);
NumericUtils.intToSortableBytes(latitudeEncoded, bytes, 0);
NumericUtils.intToSortableBytes(longitudeEncoded, bytes, Integer.BYTES);
fieldsData = new BytesRef(bytes);
currentValue = (((long)latitudeEncoded) << 32) | (longitudeEncoded & 0xFFFFFFFFL);
}
/**
@ -127,24 +132,14 @@ public class LatLonPoint extends Field {
result.append(name);
result.append(':');
result.append(decodeLatitude((int)(currentValue >> 32)));
byte bytes[] = ((BytesRef) fieldsData).bytes;
result.append(decodeLatitude(bytes, 0));
result.append(',');
result.append(decodeLongitude((int)(currentValue & 0xFFFFFFFF)));
result.append(decodeLongitude(bytes, Integer.BYTES));
result.append('>');
return result.toString();
}
/**
* Returns a 64-bit long, where the upper 32 bits are the encoded latitude,
* and the lower 32 bits are the encoded longitude.
* @see org.apache.lucene.geo.GeoEncodingUtils#decodeLatitude(int)
* @see org.apache.lucene.geo.GeoEncodingUtils#decodeLongitude(int)
*/
@Override
public Number numericValue() {
return currentValue;
}
/** sugar encodes a single point as a byte array */
private static byte[] encode(double latitude, double longitude) {
@ -175,11 +170,6 @@ public class LatLonPoint extends Field {
" but this point type has bytesPerDim=" + TYPE.pointNumBytes() +
", is the field really a LatLonPoint?");
}
if (fieldInfo.getDocValuesType() != DocValuesType.NONE && fieldInfo.getDocValuesType() != TYPE.docValuesType()) {
throw new IllegalArgumentException("field=\"" + fieldInfo.name + "\" was indexed with docValuesType=" + fieldInfo.getDocValuesType() +
" but this point type has docValuesType=" + TYPE.docValuesType() +
", is the field really a LatLonPoint?");
}
}
// static methods for generating queries
@ -278,31 +268,10 @@ public class LatLonPoint extends Field {
return new LatLonPointInPolygonQuery(field, polygons);
}
/**
* Creates a SortField for sorting by distance from a location.
* <p>
* This sort orders documents by ascending distance from the location. The value returned in {@link FieldDoc} for
* the hits contains a Double instance with the distance in meters.
* <p>
* If a document is missing the field, then by default it is treated as having {@link Double#POSITIVE_INFINITY} distance
* (missing values sort last).
* <p>
* If a document contains multiple values for the field, the <i>closest</i> distance to the location is used.
*
* @param field field name. must not be null.
* @param latitude latitude at the center: must be within standard +/-90 coordinate bounds.
* @param longitude longitude at the center: must be within standard +/-180 coordinate bounds.
* @return SortField ordering documents by distance
* @throws IllegalArgumentException if {@code field} is null or location has invalid coordinates.
*/
public static SortField newDistanceSort(String field, double latitude, double longitude) {
return new LatLonPointSortField(field, latitude, longitude);
}
/**
* Finds the {@code n} nearest indexed points to the provided point, according to Haversine distance.
* <p>
* This is functionally equivalent to running {@link MatchAllDocsQuery} with a {@link #newDistanceSort},
* This is functionally equivalent to running {@link MatchAllDocsQuery} with a {@link LatLonDocValuesField#newDistanceSort},
* but is far more efficient since it takes advantage of properties the indexed BKD tree. Currently this
* only works with {@link Lucene60PointsFormat} (used by the default codec). Multi-valued fields are
* currently not de-duplicated, so if a document had multiple instances of the specified field that

View File

@ -158,7 +158,7 @@ class LatLonPointDistanceComparator extends FieldComparator<Double> implements L
LeafReader reader = context.reader();
FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info != null) {
LatLonPoint.checkCompatible(info);
LatLonDocValuesField.checkCompatible(info);
}
currentDocs = DocValues.getSortedNumeric(reader, field);
return this;

View File

@ -0,0 +1,30 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;
import org.apache.lucene.util.LuceneTestCase;
/** Simple tests for LatLonDocValuesField */
public class TestLatLonDocValuesField extends LuceneTestCase {
public void testToString() throws Exception {
// looks crazy due to lossiness
assertEquals("LatLonDocValuesField <field:18.313693958334625,-65.22744401358068>",(new LatLonDocValuesField("field", 18.313694, -65.227444)).toString());
// sort field
assertEquals("<distance:\"field\" latitude=18.0 longitude=19.0>", LatLonDocValuesField.newDistanceSort("field", 18.0, 19.0).toString());
}
}

View File

@ -33,8 +33,5 @@ public class TestLatLonPoint extends LuceneTestCase {
// distance query does not quantize inputs
assertEquals("field:18.0,19.0 +/- 25.0 meters", LatLonPoint.newDistanceQuery("field", 18, 19, 25).toString());
// sort field
assertEquals("<distance:\"field\" latitude=18.0 longitude=19.0>", LatLonPoint.newDistanceSort("field", 18.0, 19.0).toString());
}
}

View File

@ -40,7 +40,7 @@ import static org.apache.lucene.geo.GeoEncodingUtils.decodeLongitude;
import static org.apache.lucene.geo.GeoEncodingUtils.encodeLatitude;
import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitude;
/** Simple tests for {@link LatLonPoint#newDistanceSort} */
/** Simple tests for {@link LatLonDocValuesField#newDistanceSort} */
public class TestLatLonPointDistanceSort extends LuceneTestCase {
/** Add three points and sort by distance */
@ -50,22 +50,22 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
// add some docs
Document doc = new Document();
doc.add(new LatLonPoint("location", 40.759011, -73.9844722));
doc.add(new LatLonDocValuesField("location", 40.759011, -73.9844722));
iw.addDocument(doc);
doc = new Document();
doc.add(new LatLonPoint("location", 40.718266, -74.007819));
doc.add(new LatLonDocValuesField("location", 40.718266, -74.007819));
iw.addDocument(doc);
doc = new Document();
doc.add(new LatLonPoint("location", 40.7051157, -74.0088305));
doc.add(new LatLonDocValuesField("location", 40.7051157, -74.0088305));
iw.addDocument(doc);
IndexReader reader = iw.getReader();
IndexSearcher searcher = newSearcher(reader);
iw.close();
Sort sort = new Sort(LatLonPoint.newDistanceSort("location", 40.7143528, -74.0059731));
Sort sort = new Sort(LatLonDocValuesField.newDistanceSort("location", 40.7143528, -74.0059731));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort);
FieldDoc d = (FieldDoc) td.scoreDocs[0];
@ -91,18 +91,18 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
iw.addDocument(doc);
doc = new Document();
doc.add(new LatLonPoint("location", 40.718266, -74.007819));
doc.add(new LatLonDocValuesField("location", 40.718266, -74.007819));
iw.addDocument(doc);
doc = new Document();
doc.add(new LatLonPoint("location", 40.7051157, -74.0088305));
doc.add(new LatLonDocValuesField("location", 40.7051157, -74.0088305));
iw.addDocument(doc);
IndexReader reader = iw.getReader();
IndexSearcher searcher = newSearcher(reader);
iw.close();
Sort sort = new Sort(LatLonPoint.newDistanceSort("location", 40.7143528, -74.0059731));
Sort sort = new Sort(LatLonDocValuesField.newDistanceSort("location", 40.7143528, -74.0059731));
TopDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort);
FieldDoc d = (FieldDoc) td.scoreDocs[0];
@ -199,7 +199,7 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
double lat = decodeLatitude(encodeLatitude(latRaw));
double lon = decodeLongitude(encodeLongitude(lonRaw));
doc.add(new LatLonPoint("field", lat, lon));
doc.add(new LatLonDocValuesField("field", lat, lon));
doc.add(new StoredField("lat", lat));
doc.add(new StoredField("lon", lon));
} // otherwise "missing"
@ -234,7 +234,7 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase {
// randomize the topN a bit
int topN = TestUtil.nextInt(random(), 1, reader.maxDoc());
// sort by distance, then ID
SortField distanceSort = LatLonPoint.newDistanceSort("field", lat, lon);
SortField distanceSort = LatLonDocValuesField.newDistanceSort("field", lat, lon);
distanceSort.setMissingValue(missingValue);
Sort sort = new Sort(distanceSort,
new SortField("id", SortField.Type.INT));

View File

@ -166,6 +166,7 @@ public class TestNearest extends LuceneTestCase {
lons[id] = quantizeLon(GeoTestUtil.nextLongitude());
Document doc = new Document();
doc.add(new LatLonPoint("point", lats[id], lons[id]));
doc.add(new LatLonDocValuesField("point", lats[id], lons[id]));
doc.add(new StoredField("id", id));
w.addDocument(doc);
}
@ -216,7 +217,7 @@ public class TestNearest extends LuceneTestCase {
}
// Also test with MatchAllDocsQuery, sorting by distance:
TopFieldDocs fieldDocs = s.search(new MatchAllDocsQuery(), topN, new Sort(LatLonPoint.newDistanceSort("point", pointLat, pointLon)));
TopFieldDocs fieldDocs = s.search(new MatchAllDocsQuery(), topN, new Sort(LatLonDocValuesField.newDistanceSort("point", pointLat, pointLon)));
ScoreDoc[] hits = LatLonPoint.nearest(s, "point", pointLat, pointLon, topN).scoreDocs;
for(int i=0;i<topN;i++) {