From 45c48da54ad2bf09fd4c9559ba1c776ad9460d82 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sun, 24 Apr 2016 17:15:30 -0400 Subject: [PATCH] LUCENE-7240: Remove DocValues from LatLonPoint, add DocValuesField for that --- lucene/CHANGES.txt | 4 +- .../lucene/document/LatLonDocValuesField.java | 135 ++++++++++++++++++ .../apache/lucene/document/LatLonPoint.java | 65 +++------ .../LatLonPointDistanceComparator.java | 2 +- .../document/TestLatLonDocValuesField.java | 30 ++++ .../lucene/document/TestLatLonPoint.java | 3 - .../document/TestLatLonPointDistanceSort.java | 20 +-- .../apache/lucene/document/TestNearest.java | 3 +- 8 files changed, 197 insertions(+), 65 deletions(-) create mode 100644 lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesField.java create mode 100644 lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonDocValuesField.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 848e0220839..dc4cfcba4d5 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -10,8 +10,8 @@ http://s.apache.org/luceneversions New Features -* LUCENE-7099: Add LatLonPoint.newDistanceSort to the sandbox's - LatLonPoint. (Robert Muir) +* LUCENE-7099: Add LatLonDocValuesField.newDistanceSort to the sandbox. + (Robert Muir) * LUCENE-7140: Add PlanetModel.bisection to spatial3d (Karl Wright via Mike McCandless) diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesField.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesField.java new file mode 100644 index 00000000000..20154d25505 --- /dev/null +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesField.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.document; + +import static org.apache.lucene.geo.GeoEncodingUtils.decodeLatitude; +import static org.apache.lucene.geo.GeoEncodingUtils.decodeLongitude; +import static org.apache.lucene.geo.GeoEncodingUtils.encodeLatitude; +import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitude; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.SortField; + +/** + * An per-document location field. + *

+ * Sorting by distance is efficient. Multiple values for the same field in one document + * is allowed. + *

+ * This field defines static factory methods for common operations: + *

+ *

+ * If you also need query operations, you should add a separate {@link LatLonPoint} instance. + * If you also need to store the value, you should add a separate {@link StoredField} instance. + *

+ * WARNING: Values are indexed with some loss of precision from the + * original {@code double} values (4.190951585769653E-8 for the latitude component + * and 8.381903171539307E-8 for longitude). + * @see LatLonPoint + */ +public class LatLonDocValuesField extends Field { + + /** + * Type for a LatLonDocValuesField + *

+ * Each value stores a 64-bit long where the upper 32 bits are the encoded latitude, + * and the lower 32 bits are the encoded longitude. + * @see org.apache.lucene.geo.GeoEncodingUtils#decodeLatitude(int) + * @see org.apache.lucene.geo.GeoEncodingUtils#decodeLongitude(int) + */ + public static final FieldType TYPE = new FieldType(); + static { + TYPE.setDocValuesType(DocValuesType.SORTED_NUMERIC); + TYPE.freeze(); + } + + /** + * Creates a new LatLonDocValuesField with the specified latitude and longitude + * @param name field name + * @param latitude latitude value: must be within standard +/-90 coordinate bounds. + * @param longitude longitude value: must be within standard +/-180 coordinate bounds. + * @throws IllegalArgumentException if the field name is null or latitude or longitude are out of bounds + */ + public LatLonDocValuesField(String name, double latitude, double longitude) { + super(name, TYPE); + setLocationValue(latitude, longitude); + } + + /** + * Change the values of this field + * @param latitude latitude value: must be within standard +/-90 coordinate bounds. + * @param longitude longitude value: must be within standard +/-180 coordinate bounds. + * @throws IllegalArgumentException if latitude or longitude are out of bounds + */ + public void setLocationValue(double latitude, double longitude) { + int latitudeEncoded = encodeLatitude(latitude); + int longitudeEncoded = encodeLongitude(longitude); + fieldsData = Long.valueOf((((long)latitudeEncoded) << 32) | (longitudeEncoded & 0xFFFFFFFFL)); + } + + /** helper: checks a fieldinfo and throws exception if its definitely not a LatLonDocValuesField */ + static void checkCompatible(FieldInfo fieldInfo) { + // dv properties could be "unset", if you e.g. used only StoredField with this same name in the segment. + if (fieldInfo.getDocValuesType() != DocValuesType.NONE && fieldInfo.getDocValuesType() != TYPE.docValuesType()) { + throw new IllegalArgumentException("field=\"" + fieldInfo.name + "\" was indexed with docValuesType=" + fieldInfo.getDocValuesType() + + " but this type has docValuesType=" + TYPE.docValuesType() + + ", is the field really a LatLonDocValuesField?"); + } + } + + @Override + public String toString() { + StringBuilder result = new StringBuilder(); + result.append(getClass().getSimpleName()); + result.append(" <"); + result.append(name); + result.append(':'); + + long currentValue = Long.valueOf((Long)fieldsData); + result.append(decodeLatitude((int)(currentValue >> 32))); + result.append(','); + result.append(decodeLongitude((int)(currentValue & 0xFFFFFFFF))); + + result.append('>'); + return result.toString(); + } + + /** + * Creates a SortField for sorting by distance from a location. + *

+ * This sort orders documents by ascending distance from the location. The value returned in {@link FieldDoc} for + * the hits contains a Double instance with the distance in meters. + *

+ * If a document is missing the field, then by default it is treated as having {@link Double#POSITIVE_INFINITY} distance + * (missing values sort last). + *

+ * If a document contains multiple values for the field, the closest distance to the location is used. + * + * @param field field name. must not be null. + * @param latitude latitude at the center: must be within standard +/-90 coordinate bounds. + * @param longitude longitude at the center: must be within standard +/-180 coordinate bounds. + * @return SortField ordering documents by distance + * @throws IllegalArgumentException if {@code field} is null or location has invalid coordinates. + */ + public static SortField newDistanceSort(String field, double latitude, double longitude) { + return new LatLonPointSortField(field, latitude, longitude); + } +} diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java index 0f6afe9116f..426a7022a6a 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPoint.java @@ -24,7 +24,6 @@ import org.apache.lucene.codecs.lucene60.Lucene60PointsFormat; import org.apache.lucene.codecs.lucene60.Lucene60PointsReader; import org.apache.lucene.geo.GeoUtils; import org.apache.lucene.geo.Polygon; -import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PointValues; @@ -38,7 +37,6 @@ import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -63,21 +61,23 @@ import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitudeCeil; *

*

+ * If you also need per-document operations such as sort by distance, add a separate {@link LatLonDocValuesField} instance. + * If you also need to store the value, you should add a separate {@link StoredField} instance. + *

* WARNING: Values are indexed with some loss of precision from the * original {@code double} values (4.190951585769653E-8 for the latitude component * and 8.381903171539307E-8 for longitude). * @see PointValues + * @see LatLonDocValuesField */ // TODO ^^^ that is very sandy and hurts the API, usage, and tests tremendously, because what the user passes // to the field is not actually what gets indexed. Float would be 1E-5 error vs 1E-7, but it might be // a better tradeoff? then it would be completely transparent to the user and lucene would be "lossless". public class LatLonPoint extends Field { - private long currentValue; /** * Type for an indexed LatLonPoint @@ -87,7 +87,6 @@ public class LatLonPoint extends Field { public static final FieldType TYPE = new FieldType(); static { TYPE.setDimensions(2, Integer.BYTES); - TYPE.setDocValuesType(DocValuesType.SORTED_NUMERIC); TYPE.freeze(); } @@ -98,13 +97,19 @@ public class LatLonPoint extends Field { * @throws IllegalArgumentException if latitude or longitude are out of bounds */ public void setLocationValue(double latitude, double longitude) { - byte[] bytes = new byte[8]; + final byte[] bytes; + + if (fieldsData == null) { + bytes = new byte[8]; + fieldsData = new BytesRef(bytes); + } else { + bytes = ((BytesRef) fieldsData).bytes; + } + int latitudeEncoded = encodeLatitude(latitude); int longitudeEncoded = encodeLongitude(longitude); NumericUtils.intToSortableBytes(latitudeEncoded, bytes, 0); NumericUtils.intToSortableBytes(longitudeEncoded, bytes, Integer.BYTES); - fieldsData = new BytesRef(bytes); - currentValue = (((long)latitudeEncoded) << 32) | (longitudeEncoded & 0xFFFFFFFFL); } /** @@ -127,24 +132,14 @@ public class LatLonPoint extends Field { result.append(name); result.append(':'); - result.append(decodeLatitude((int)(currentValue >> 32))); + byte bytes[] = ((BytesRef) fieldsData).bytes; + result.append(decodeLatitude(bytes, 0)); result.append(','); - result.append(decodeLongitude((int)(currentValue & 0xFFFFFFFF))); + result.append(decodeLongitude(bytes, Integer.BYTES)); result.append('>'); return result.toString(); } - - /** - * Returns a 64-bit long, where the upper 32 bits are the encoded latitude, - * and the lower 32 bits are the encoded longitude. - * @see org.apache.lucene.geo.GeoEncodingUtils#decodeLatitude(int) - * @see org.apache.lucene.geo.GeoEncodingUtils#decodeLongitude(int) - */ - @Override - public Number numericValue() { - return currentValue; - } /** sugar encodes a single point as a byte array */ private static byte[] encode(double latitude, double longitude) { @@ -175,11 +170,6 @@ public class LatLonPoint extends Field { " but this point type has bytesPerDim=" + TYPE.pointNumBytes() + ", is the field really a LatLonPoint?"); } - if (fieldInfo.getDocValuesType() != DocValuesType.NONE && fieldInfo.getDocValuesType() != TYPE.docValuesType()) { - throw new IllegalArgumentException("field=\"" + fieldInfo.name + "\" was indexed with docValuesType=" + fieldInfo.getDocValuesType() + - " but this point type has docValuesType=" + TYPE.docValuesType() + - ", is the field really a LatLonPoint?"); - } } // static methods for generating queries @@ -278,31 +268,10 @@ public class LatLonPoint extends Field { return new LatLonPointInPolygonQuery(field, polygons); } - /** - * Creates a SortField for sorting by distance from a location. - *

- * This sort orders documents by ascending distance from the location. The value returned in {@link FieldDoc} for - * the hits contains a Double instance with the distance in meters. - *

- * If a document is missing the field, then by default it is treated as having {@link Double#POSITIVE_INFINITY} distance - * (missing values sort last). - *

- * If a document contains multiple values for the field, the closest distance to the location is used. - * - * @param field field name. must not be null. - * @param latitude latitude at the center: must be within standard +/-90 coordinate bounds. - * @param longitude longitude at the center: must be within standard +/-180 coordinate bounds. - * @return SortField ordering documents by distance - * @throws IllegalArgumentException if {@code field} is null or location has invalid coordinates. - */ - public static SortField newDistanceSort(String field, double latitude, double longitude) { - return new LatLonPointSortField(field, latitude, longitude); - } - /** * Finds the {@code n} nearest indexed points to the provided point, according to Haversine distance. *

- * This is functionally equivalent to running {@link MatchAllDocsQuery} with a {@link #newDistanceSort}, + * This is functionally equivalent to running {@link MatchAllDocsQuery} with a {@link LatLonDocValuesField#newDistanceSort}, * but is far more efficient since it takes advantage of properties the indexed BKD tree. Currently this * only works with {@link Lucene60PointsFormat} (used by the default codec). Multi-valued fields are * currently not de-duplicated, so if a document had multiple instances of the specified field that diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java index 0306489da3c..0b1d0c7d848 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceComparator.java @@ -158,7 +158,7 @@ class LatLonPointDistanceComparator extends FieldComparator implements L LeafReader reader = context.reader(); FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info != null) { - LatLonPoint.checkCompatible(info); + LatLonDocValuesField.checkCompatible(info); } currentDocs = DocValues.getSortedNumeric(reader, field); return this; diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonDocValuesField.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonDocValuesField.java new file mode 100644 index 00000000000..df934d13406 --- /dev/null +++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonDocValuesField.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.document; + +import org.apache.lucene.util.LuceneTestCase; + +/** Simple tests for LatLonDocValuesField */ +public class TestLatLonDocValuesField extends LuceneTestCase { + public void testToString() throws Exception { + // looks crazy due to lossiness + assertEquals("LatLonDocValuesField ",(new LatLonDocValuesField("field", 18.313694, -65.227444)).toString()); + + // sort field + assertEquals("", LatLonDocValuesField.newDistanceSort("field", 18.0, 19.0).toString()); + } +} diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPoint.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPoint.java index acc9186b14f..700eb56e459 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPoint.java +++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPoint.java @@ -33,8 +33,5 @@ public class TestLatLonPoint extends LuceneTestCase { // distance query does not quantize inputs assertEquals("field:18.0,19.0 +/- 25.0 meters", LatLonPoint.newDistanceQuery("field", 18, 19, 25).toString()); - - // sort field - assertEquals("", LatLonPoint.newDistanceSort("field", 18.0, 19.0).toString()); } } diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java index e40f33d864b..6d825d2b76c 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java +++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestLatLonPointDistanceSort.java @@ -40,7 +40,7 @@ import static org.apache.lucene.geo.GeoEncodingUtils.decodeLongitude; import static org.apache.lucene.geo.GeoEncodingUtils.encodeLatitude; import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitude; -/** Simple tests for {@link LatLonPoint#newDistanceSort} */ +/** Simple tests for {@link LatLonDocValuesField#newDistanceSort} */ public class TestLatLonPointDistanceSort extends LuceneTestCase { /** Add three points and sort by distance */ @@ -50,22 +50,22 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase { // add some docs Document doc = new Document(); - doc.add(new LatLonPoint("location", 40.759011, -73.9844722)); + doc.add(new LatLonDocValuesField("location", 40.759011, -73.9844722)); iw.addDocument(doc); doc = new Document(); - doc.add(new LatLonPoint("location", 40.718266, -74.007819)); + doc.add(new LatLonDocValuesField("location", 40.718266, -74.007819)); iw.addDocument(doc); doc = new Document(); - doc.add(new LatLonPoint("location", 40.7051157, -74.0088305)); + doc.add(new LatLonDocValuesField("location", 40.7051157, -74.0088305)); iw.addDocument(doc); IndexReader reader = iw.getReader(); IndexSearcher searcher = newSearcher(reader); iw.close(); - Sort sort = new Sort(LatLonPoint.newDistanceSort("location", 40.7143528, -74.0059731)); + Sort sort = new Sort(LatLonDocValuesField.newDistanceSort("location", 40.7143528, -74.0059731)); TopDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort); FieldDoc d = (FieldDoc) td.scoreDocs[0]; @@ -91,18 +91,18 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase { iw.addDocument(doc); doc = new Document(); - doc.add(new LatLonPoint("location", 40.718266, -74.007819)); + doc.add(new LatLonDocValuesField("location", 40.718266, -74.007819)); iw.addDocument(doc); doc = new Document(); - doc.add(new LatLonPoint("location", 40.7051157, -74.0088305)); + doc.add(new LatLonDocValuesField("location", 40.7051157, -74.0088305)); iw.addDocument(doc); IndexReader reader = iw.getReader(); IndexSearcher searcher = newSearcher(reader); iw.close(); - Sort sort = new Sort(LatLonPoint.newDistanceSort("location", 40.7143528, -74.0059731)); + Sort sort = new Sort(LatLonDocValuesField.newDistanceSort("location", 40.7143528, -74.0059731)); TopDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort); FieldDoc d = (FieldDoc) td.scoreDocs[0]; @@ -199,7 +199,7 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase { double lat = decodeLatitude(encodeLatitude(latRaw)); double lon = decodeLongitude(encodeLongitude(lonRaw)); - doc.add(new LatLonPoint("field", lat, lon)); + doc.add(new LatLonDocValuesField("field", lat, lon)); doc.add(new StoredField("lat", lat)); doc.add(new StoredField("lon", lon)); } // otherwise "missing" @@ -234,7 +234,7 @@ public class TestLatLonPointDistanceSort extends LuceneTestCase { // randomize the topN a bit int topN = TestUtil.nextInt(random(), 1, reader.maxDoc()); // sort by distance, then ID - SortField distanceSort = LatLonPoint.newDistanceSort("field", lat, lon); + SortField distanceSort = LatLonDocValuesField.newDistanceSort("field", lat, lon); distanceSort.setMissingValue(missingValue); Sort sort = new Sort(distanceSort, new SortField("id", SortField.Type.INT)); diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java index fc073c7b572..66630df2bca 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java +++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestNearest.java @@ -166,6 +166,7 @@ public class TestNearest extends LuceneTestCase { lons[id] = quantizeLon(GeoTestUtil.nextLongitude()); Document doc = new Document(); doc.add(new LatLonPoint("point", lats[id], lons[id])); + doc.add(new LatLonDocValuesField("point", lats[id], lons[id])); doc.add(new StoredField("id", id)); w.addDocument(doc); } @@ -216,7 +217,7 @@ public class TestNearest extends LuceneTestCase { } // Also test with MatchAllDocsQuery, sorting by distance: - TopFieldDocs fieldDocs = s.search(new MatchAllDocsQuery(), topN, new Sort(LatLonPoint.newDistanceSort("point", pointLat, pointLon))); + TopFieldDocs fieldDocs = s.search(new MatchAllDocsQuery(), topN, new Sort(LatLonDocValuesField.newDistanceSort("point", pointLat, pointLon))); ScoreDoc[] hits = LatLonPoint.nearest(s, "point", pointLat, pointLon, topN).scoreDocs; for(int i=0;i