LUCENE-8638: Expressions haversin() method should continue to return its value in km (#299)

SloppyMath had a deprecated haversin() function that returned its values in
km, which has been replaced by a haversinMeters() function that is explicit
about its units. As part of removing this function, we changed the expressions
module haversin function to point instead to haversinMeters. However, this
may silently change the behaviour of expressions on upgrade.

This commit instead adds a haversinKilometers method to the expressions
module and maps the haversin function to it. It also adds a new
haversinMeters expression function to be more explicit for future users.
This commit is contained in:
Alan Woodward 2021-09-14 14:01:10 +01:00 committed by GitHub
parent 3802bdc686
commit 26093735cc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 144 additions and 11 deletions

View File

@ -31,9 +31,6 @@ API Changes
* LUCENE-8638: Remove many deprecated methods and classes including FST.lookupByOutput(),
LegacyBM25Similarity and Jaspell suggester.
* LUCENE-8638: haversin() expressions function now returns its result in meters rather than
kilometers.
* LUCENE-8982: Separate out native code to another module to allow cpp
build with gradle. This also changes the name of the native "posix-support"
library to LuceneNativeIO. (Zachary Chen, Dawid Weiss)

View File

@ -54,10 +54,10 @@ import org.apache.lucene.store.Directory;
*/
public class DistanceFacetsExample implements Closeable {
final DoubleRange ONE_KM = new DoubleRange("< 1 km", 0.0, true, 1000.0, false);
final DoubleRange TWO_KM = new DoubleRange("< 2 km", 0.0, true, 2000.0, false);
final DoubleRange FIVE_KM = new DoubleRange("< 5 km", 0.0, true, 5000.0, false);
final DoubleRange TEN_KM = new DoubleRange("< 10 km", 0.0, true, 10000.0, false);
final DoubleRange ONE_KM = new DoubleRange("< 1 km", 0.0, true, 1.0, false);
final DoubleRange TWO_KM = new DoubleRange("< 2 km", 0.0, true, 2.0, false);
final DoubleRange FIVE_KM = new DoubleRange("< 5 km", 0.0, true, 5.0, false);
final DoubleRange TEN_KM = new DoubleRange("< 10 km", 0.0, true, 10.0, false);
private final Directory indexDir = new ByteBuffersDirectory();
private IndexSearcher searcher;

View File

@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.expressions.js;
import static org.apache.lucene.util.SloppyMath.haversinMeters;
import static org.apache.lucene.util.SloppyMath.haversinSortKey;
/** Helper class holding static methods for js math functions */
public final class ExpressionMath {
private ExpressionMath() {}
private static final double TO_KILOMETERS = 1D / 1000;
/**
* Returns the Haversine distance in kilometers between two points specified in decimal degrees
* (latitude/longitude). This works correctly even if the dateline is between the two points.
*
* <p>Error is at most 4E-1 (40cm) from the actual haversine distance, but is typically much
* smaller for reasonable distances: around 1E-5 (0.01mm) for distances less than 1000km.
*
* @param lat1 Latitude of the first point.
* @param lon1 Longitude of the first point.
* @param lat2 Latitude of the second point.
* @param lon2 Longitude of the second point.
* @return distance in kilometers.
*/
public static double haversinKilometers(double lat1, double lon1, double lat2, double lon2) {
return haversinMeters(haversinSortKey(lat1, lon1, lat2, lon2)) * TO_KILOMETERS;
}
}

View File

@ -30,7 +30,7 @@
* <li>Common mathematic functions: <code>abs ceil exp floor ln log10 logn max min sqrt pow</code>
* <li>Trigonometric library functions: <code>
* acosh acos asinh asin atanh atan atan2 cosh cos sinh sin tanh tan</code>
* <li>Distance functions: <code>haversin</code>
* <li>Distance functions: <code>haversin</code> <code>haversinMeters</code>
* <li>Miscellaneous functions: <code>min, max</code>
* <li>Arbitrary external variables - see {@link org.apache.lucene.expressions.Bindings}
* </ul>

View File

@ -31,7 +31,8 @@ cos = java.lang.Math, cos, 1
cosh = java.lang.Math, cosh, 1
exp = java.lang.Math, exp, 1
floor = java.lang.Math, floor, 1
haversin = org.apache.lucene.util.SloppyMath, haversinMeters, 4
haversin = org.apache.lucene.expressions.js.ExpressionMath, haversinKilometers, 4
haversinMeters = org.apache.lucene.util.SloppyMath, haversinMeters, 4
ln = java.lang.Math, log, 1
log10 = java.lang.Math, log10, 1
logn = org.apache.lucene.util.MathUtil, log, 2

View File

@ -220,11 +220,30 @@ public class TestDemoExpressions extends LuceneTestCase {
Sort sort = new Sort(distance.getSortField(bindings, false));
TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort);
FieldDoc d = (FieldDoc) td.scoreDocs[0];
assertEquals(0.4621D, (Double) d.fields[0], 1E-1);
d = (FieldDoc) td.scoreDocs[1];
assertEquals(1.055, (Double) d.fields[0], 1E-1);
d = (FieldDoc) td.scoreDocs[2];
assertEquals(5.2859D, (Double) d.fields[0], 1E-1);
}
public void testHaversinMetersDistanceSort() throws Exception {
Expression distance =
JavascriptCompiler.compile("haversinMeters(40.7143528,-74.0059731,latitude,longitude)");
SimpleBindings bindings = new SimpleBindings();
bindings.add("latitude", DoubleValuesSource.fromDoubleField("latitude"));
bindings.add("longitude", DoubleValuesSource.fromDoubleField("longitude"));
Sort sort = new Sort(distance.getSortField(bindings, false));
TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 3, sort);
FieldDoc d = (FieldDoc) td.scoreDocs[0];
assertEquals(462.1D, (Double) d.fields[0], 1E-1);
d = (FieldDoc) td.scoreDocs[1];
assertEquals(1055D, (Double) d.fields[0], 1E-1);
assertEquals(1055, (Double) d.fields[0], 1E-1);
d = (FieldDoc) td.scoreDocs[2];
assertEquals(5285.9D, (Double) d.fields[0], 1E-1);

View File

@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.expressions.js;
import static org.apache.lucene.expressions.js.ExpressionMath.haversinKilometers;
import java.util.Random;
import org.apache.lucene.util.LuceneTestCase;
public class TestExpressionMath extends LuceneTestCase {
public void testHaversin() {
assertTrue(Double.isNaN(haversinKilometers(1, 1, 1, Double.NaN)));
assertTrue(Double.isNaN(haversinKilometers(1, 1, Double.NaN, 1)));
assertTrue(Double.isNaN(haversinKilometers(1, Double.NaN, 1, 1)));
assertTrue(Double.isNaN(haversinKilometers(Double.NaN, 1, 1, 1)));
assertEquals(0, haversinKilometers(0, 0, 0, 0), 0D);
assertEquals(0, haversinKilometers(0, -180, 0, -180), 0D);
assertEquals(0, haversinKilometers(0, -180, 0, 180), 0D);
assertEquals(0, haversinKilometers(0, 180, 0, 180), 0D);
assertEquals(0, haversinKilometers(90, 0, 90, 0), 0D);
assertEquals(0, haversinKilometers(90, -180, 90, -180), 0D);
assertEquals(0, haversinKilometers(90, -180, 90, 180), 0D);
assertEquals(0, haversinKilometers(90, 180, 90, 180), 0D);
// Test half a circle on the equator, using WGS84 mean earth radius in meters
double earthRadiusMs = 6_371_008.7714;
double halfCircle = earthRadiusMs * Math.PI / 1000;
assertEquals(halfCircle, haversinKilometers(0, 0, 0, 180), 0D);
Random r = random();
double randomLat1 = 40.7143528 + (r.nextInt(10) - 5) * 360;
double randomLon1 = -74.0059731 + (r.nextInt(10) - 5) * 360;
double randomLat2 = 40.65 + (r.nextInt(10) - 5) * 360;
double randomLon2 = -73.95 + (r.nextInt(10) - 5) * 360;
assertEquals(
8.5721137, haversinKilometers(randomLat1, randomLon1, randomLat2, randomLon2), 0.01D);
// from solr and ES tests (with their respective epsilons)
assertEquals(0, haversinKilometers(40.7143528, -74.0059731, 40.7143528, -74.0059731), 0D);
assertEquals(
5.28589, haversinKilometers(40.7143528, -74.0059731, 40.759011, -73.9844722), 0.01D);
assertEquals(
0.46210, haversinKilometers(40.7143528, -74.0059731, 40.718266, -74.007819), 0.01D);
assertEquals(
1.05498, haversinKilometers(40.7143528, -74.0059731, 40.7051157, -74.0088305), 0.01D);
assertEquals(1.25812, haversinKilometers(40.7143528, -74.0059731, 40.7247222, -74), 0.01D);
assertEquals(
2.02852, haversinKilometers(40.7143528, -74.0059731, 40.731033, -73.9962255), 0.01D);
assertEquals(8.57211, haversinKilometers(40.7143528, -74.0059731, 40.65, -73.95), 0.01D);
}
}

View File

@ -158,7 +158,7 @@ public class TestJavascriptFunction extends LuceneTestCase {
}
public void testHaversinMethod() throws Exception {
assertEvaluatesTo("haversin(40.7143528,-74.0059731,40.759011,-73.9844722)", 5285.885589128259);
assertEvaluatesTo("haversin(40.7143528,-74.0059731,40.759011,-73.9844722)", 5.285885589128259);
}
public void testLnMethod() throws Exception {