diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 848e0220839..dc4cfcba4d5 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -10,8 +10,8 @@ http://s.apache.org/luceneversions New Features -* LUCENE-7099: Add LatLonPoint.newDistanceSort to the sandbox's - LatLonPoint. (Robert Muir) +* LUCENE-7099: Add LatLonDocValuesField.newDistanceSort to the sandbox. + (Robert Muir) * LUCENE-7140: Add PlanetModel.bisection to spatial3d (Karl Wright via Mike McCandless) diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesField.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesField.java new file mode 100644 index 00000000000..20154d25505 --- /dev/null +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesField.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.document; + +import static org.apache.lucene.geo.GeoEncodingUtils.decodeLatitude; +import static org.apache.lucene.geo.GeoEncodingUtils.decodeLongitude; +import static org.apache.lucene.geo.GeoEncodingUtils.encodeLatitude; +import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitude; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.SortField; + +/** + * An per-document location field. + *
+ * Sorting by distance is efficient. Multiple values for the same field in one document + * is allowed. + *
+ * This field defines static factory methods for common operations: + *
+ * If you also need query operations, you should add a separate {@link LatLonPoint} instance. + * If you also need to store the value, you should add a separate {@link StoredField} instance. + *
+ * WARNING: Values are indexed with some loss of precision from the + * original {@code double} values (4.190951585769653E-8 for the latitude component + * and 8.381903171539307E-8 for longitude). + * @see LatLonPoint + */ +public class LatLonDocValuesField extends Field { + + /** + * Type for a LatLonDocValuesField + *
+ * Each value stores a 64-bit long where the upper 32 bits are the encoded latitude, + * and the lower 32 bits are the encoded longitude. + * @see org.apache.lucene.geo.GeoEncodingUtils#decodeLatitude(int) + * @see org.apache.lucene.geo.GeoEncodingUtils#decodeLongitude(int) + */ + public static final FieldType TYPE = new FieldType(); + static { + TYPE.setDocValuesType(DocValuesType.SORTED_NUMERIC); + TYPE.freeze(); + } + + /** + * Creates a new LatLonDocValuesField with the specified latitude and longitude + * @param name field name + * @param latitude latitude value: must be within standard +/-90 coordinate bounds. + * @param longitude longitude value: must be within standard +/-180 coordinate bounds. + * @throws IllegalArgumentException if the field name is null or latitude or longitude are out of bounds + */ + public LatLonDocValuesField(String name, double latitude, double longitude) { + super(name, TYPE); + setLocationValue(latitude, longitude); + } + + /** + * Change the values of this field + * @param latitude latitude value: must be within standard +/-90 coordinate bounds. + * @param longitude longitude value: must be within standard +/-180 coordinate bounds. + * @throws IllegalArgumentException if latitude or longitude are out of bounds + */ + public void setLocationValue(double latitude, double longitude) { + int latitudeEncoded = encodeLatitude(latitude); + int longitudeEncoded = encodeLongitude(longitude); + fieldsData = Long.valueOf((((long)latitudeEncoded) << 32) | (longitudeEncoded & 0xFFFFFFFFL)); + } + + /** helper: checks a fieldinfo and throws exception if its definitely not a LatLonDocValuesField */ + static void checkCompatible(FieldInfo fieldInfo) { + // dv properties could be "unset", if you e.g. used only StoredField with this same name in the segment. + if (fieldInfo.getDocValuesType() != DocValuesType.NONE && fieldInfo.getDocValuesType() != TYPE.docValuesType()) { + throw new IllegalArgumentException("field=\"" + fieldInfo.name + "\" was indexed with docValuesType=" + fieldInfo.getDocValuesType() + + " but this type has docValuesType=" + TYPE.docValuesType() + + ", is the field really a LatLonDocValuesField?"); + } + } + + @Override + public String toString() { + StringBuilder result = new StringBuilder(); + result.append(getClass().getSimpleName()); + result.append(" <"); + result.append(name); + result.append(':'); + + long currentValue = Long.valueOf((Long)fieldsData); + result.append(decodeLatitude((int)(currentValue >> 32))); + result.append(','); + result.append(decodeLongitude((int)(currentValue & 0xFFFFFFFF))); + + result.append('>'); + return result.toString(); + } + + /** + * Creates a SortField for sorting by distance from a location. + *
+ * This sort orders documents by ascending distance from the location. The value returned in {@link FieldDoc} for + * the hits contains a Double instance with the distance in meters. + *
+ * If a document is missing the field, then by default it is treated as having {@link Double#POSITIVE_INFINITY} distance + * (missing values sort last). + *
+ * If a document contains multiple values for the field, the closest distance to the location is used. + * + * @param field field name. must not be null. + * @param latitude latitude at the center: must be within standard +/-90 coordinate bounds. + * @param longitude longitude at the center: must be within standard +/-180 coordinate bounds. + * @return SortField ordering documents by distance + * @throws IllegalArgumentException if {@code field} is null or location has invalid coordinates. + */ + public static SortField newDistanceSort(String field, double latitude, double longitude) { + return new LatLonPointSortField(field, latitude, longitude); + } +} diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java index 0f6afe9116f..426a7022a6a 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java @@ -24,7 +24,6 @@ import org.apache.lucene.codecs.lucene60.Lucene60PointsFormat; import org.apache.lucene.codecs.lucene60.Lucene60PointsReader; import org.apache.lucene.geo.GeoUtils; import org.apache.lucene.geo.Polygon; -import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PointValues; @@ -38,7 +37,6 @@ import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -63,21 +61,23 @@ import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitudeCeil; *
+ * If you also need per-document operations such as sort by distance, add a separate {@link LatLonDocValuesField} instance. + * If you also need to store the value, you should add a separate {@link StoredField} instance. + *
* WARNING: Values are indexed with some loss of precision from the * original {@code double} values (4.190951585769653E-8 for the latitude component * and 8.381903171539307E-8 for longitude). * @see PointValues + * @see LatLonDocValuesField */ // TODO ^^^ that is very sandy and hurts the API, usage, and tests tremendously, because what the user passes // to the field is not actually what gets indexed. Float would be 1E-5 error vs 1E-7, but it might be // a better tradeoff? then it would be completely transparent to the user and lucene would be "lossless". public class LatLonPoint extends Field { - private long currentValue; /** * Type for an indexed LatLonPoint @@ -87,7 +87,6 @@ public class LatLonPoint extends Field { public static final FieldType TYPE = new FieldType(); static { TYPE.setDimensions(2, Integer.BYTES); - TYPE.setDocValuesType(DocValuesType.SORTED_NUMERIC); TYPE.freeze(); } @@ -98,13 +97,19 @@ public class LatLonPoint extends Field { * @throws IllegalArgumentException if latitude or longitude are out of bounds */ public void setLocationValue(double latitude, double longitude) { - byte[] bytes = new byte[8]; + final byte[] bytes; + + if (fieldsData == null) { + bytes = new byte[8]; + fieldsData = new BytesRef(bytes); + } else { + bytes = ((BytesRef) fieldsData).bytes; + } + int latitudeEncoded = encodeLatitude(latitude); int longitudeEncoded = encodeLongitude(longitude); NumericUtils.intToSortableBytes(latitudeEncoded, bytes, 0); NumericUtils.intToSortableBytes(longitudeEncoded, bytes, Integer.BYTES); - fieldsData = new BytesRef(bytes); - currentValue = (((long)latitudeEncoded) << 32) | (longitudeEncoded & 0xFFFFFFFFL); } /** @@ -127,24 +132,14 @@ public class LatLonPoint extends Field { result.append(name); result.append(':'); - result.append(decodeLatitude((int)(currentValue >> 32))); + byte bytes[] = ((BytesRef) fieldsData).bytes; + result.append(decodeLatitude(bytes, 0)); result.append(','); - result.append(decodeLongitude((int)(currentValue & 0xFFFFFFFF))); + result.append(decodeLongitude(bytes, Integer.BYTES)); result.append('>'); return result.toString(); } - - /** - * Returns a 64-bit long, where the upper 32 bits are the encoded latitude, - * and the lower 32 bits are the encoded longitude. - * @see org.apache.lucene.geo.GeoEncodingUtils#decodeLatitude(int) - * @see org.apache.lucene.geo.GeoEncodingUtils#decodeLongitude(int) - */ - @Override - public Number numericValue() { - return currentValue; - } /** sugar encodes a single point as a byte array */ private static byte[] encode(double latitude, double longitude) { @@ -175,11 +170,6 @@ public class LatLonPoint extends Field { " but this point type has bytesPerDim=" + TYPE.pointNumBytes() + ", is the field really a LatLonPoint?"); } - if (fieldInfo.getDocValuesType() != DocValuesType.NONE && fieldInfo.getDocValuesType() != TYPE.docValuesType()) { - throw new IllegalArgumentException("field=\"" + fieldInfo.name + "\" was indexed with docValuesType=" + fieldInfo.getDocValuesType() + - " but this point type has docValuesType=" + TYPE.docValuesType() + - ", is the field really a LatLonPoint?"); - } } // static methods for generating queries @@ -278,31 +268,10 @@ public class LatLonPoint extends Field { return new LatLonPointInPolygonQuery(field, polygons); } - /** - * Creates a SortField for sorting by distance from a location. - *
- * This sort orders documents by ascending distance from the location. The value returned in {@link FieldDoc} for - * the hits contains a Double instance with the distance in meters. - *
- * If a document is missing the field, then by default it is treated as having {@link Double#POSITIVE_INFINITY} distance - * (missing values sort last). - *
- * If a document contains multiple values for the field, the closest distance to the location is used. - * - * @param field field name. must not be null. - * @param latitude latitude at the center: must be within standard +/-90 coordinate bounds. - * @param longitude longitude at the center: must be within standard +/-180 coordinate bounds. - * @return SortField ordering documents by distance - * @throws IllegalArgumentException if {@code field} is null or location has invalid coordinates. - */ - public static SortField newDistanceSort(String field, double latitude, double longitude) { - return new LatLonPointSortField(field, latitude, longitude); - } - /** * Finds the {@code n} nearest indexed points to the provided point, according to Haversine distance. *
- * This is functionally equivalent to running {@link MatchAllDocsQuery} with a {@link #newDistanceSort},
+ * This is functionally equivalent to running {@link MatchAllDocsQuery} with a {@link LatLonDocValuesField#newDistanceSort},
* but is far more efficient since it takes advantage of properties the indexed BKD tree. Currently this
* only works with {@link Lucene60PointsFormat} (used by the default codec). Multi-valued fields are
* currently not de-duplicated, so if a document had multiple instances of the specified field that
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java
index 0306489da3c..0b1d0c7d848 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java
@@ -158,7 +158,7 @@ class LatLonPointDistanceComparator extends FieldComparator