LUCENE-5258: add distance function to expressions/

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1529679 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-10-06 20:47:32 +00:00
parent 9e9324b81e
commit 9c98f9d958
6 changed files with 367 additions and 0 deletions

View File

@ -0,0 +1,229 @@
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* some code derived from jodk: http://code.google.com/p/jodk/ (apache 2.0)
* asin() derived from fdlibm: http://www.netlib.org/fdlibm/e_asin.c (public domain):
* =============================================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* =============================================================================
*/
/** Math functions that trade off accuracy for speed. */
public class SloppyMath {
/**
* Returns the distance between two points in decimal degrees.
* @param lat1 Latitude of the first point.
* @param lon1 Longitude of the first point.
* @param lat2 Latitude of the second point.
* @param lon2 Longitude of the second point.
* @return distance in kilometers.
*/
public static double haversin(double lat1, double lon1, double lat2, double lon2) {
double x1 = lat1 * TO_RADIANS;
double x2 = lat2 * TO_RADIANS;
double h1 = (1 - cos(x1 - x2)) / 2;
double h2 = (1 - cos((lon1 - lon2) * TO_RADIANS)) / 2;
double h = h1 + cos(x1) * cos(x2) * h2;
return TO_KILOMETERS * 2 * asin(Math.min(1, Math.sqrt(h)));
}
/**
* Returns the trigonometric cosine of an angle.
* <p>
* Error is around 1E-15.
* <p>
* Special cases:
* <ul>
* <li>If the argument is {@code NaN} or an infinity, then the result is {@code NaN}.
* </ul>
* @param a an angle, in radians.
* @return the cosine of the argument.
* @see Math#cos(double)
*/
public static double cos(double a) {
if (a < 0.0) {
a = -a;
}
if (a > SIN_COS_MAX_VALUE_FOR_INT_MODULO) {
return Math.cos(a);
}
// index: possibly outside tables range.
int index = (int)(a * SIN_COS_INDEXER + 0.5);
double delta = (a - index * SIN_COS_DELTA_HI) - index * SIN_COS_DELTA_LO;
// Making sure index is within tables range.
// Last value of each table is the same than first, so we ignore it (tabs size minus one) for modulo.
index &= (SIN_COS_TABS_SIZE-2); // index % (SIN_COS_TABS_SIZE-1)
double indexCos = cosTab[index];
double indexSin = sinTab[index];
return indexCos + delta * (-indexSin + delta * (-indexCos * ONE_DIV_F2 + delta * (indexSin * ONE_DIV_F3 + delta * indexCos * ONE_DIV_F4)));
}
/**
* Returns the arc sine of a value.
* <p>
* The returned angle is in the range <i>-pi</i>/2 through <i>pi</i>/2.
* Error is around 1E-7.
* <p>
* Special cases:
* <ul>
* <li>If the argument is {@code NaN} or its absolute value is greater than 1, then the result is {@code NaN}.
* </ul>
* @param a the value whose arc sine is to be returned.
* @return arc sine of the argument
* @see Math#asin(double)
*/
// because asin(-x) = -asin(x), asin(x) only needs to be computed on [0,1].
// ---> we only have to compute asin(x) on [0,1].
// For values not close to +-1, we use look-up tables;
// for values near +-1, we use code derived from fdlibm.
public static double asin(double a) {
boolean negateResult;
if (a < 0.0) {
a = -a;
negateResult = true;
} else {
negateResult = false;
}
if (a <= ASIN_MAX_VALUE_FOR_TABS) {
int index = (int)(a * ASIN_INDEXER + 0.5);
double delta = a - index * ASIN_DELTA;
double result = asinTab[index] + delta * (asinDer1DivF1Tab[index] + delta * (asinDer2DivF2Tab[index] + delta * (asinDer3DivF3Tab[index] + delta * asinDer4DivF4Tab[index])));
return negateResult ? -result : result;
} else { // value > ASIN_MAX_VALUE_FOR_TABS, or value is NaN
// This part is derived from fdlibm.
if (a < 1.0) {
double t = (1.0 - a)*0.5;
double p = t*(ASIN_PS0+t*(ASIN_PS1+t*(ASIN_PS2+t*(ASIN_PS3+t*(ASIN_PS4+t*ASIN_PS5)))));
double q = 1.0+t*(ASIN_QS1+t*(ASIN_QS2+t*(ASIN_QS3+t*ASIN_QS4)));
double s = Math.sqrt(t);
double z = s+s*(p/q);
double result = ASIN_PIO2_HI-((z+z)-ASIN_PIO2_LO);
return negateResult ? -result : result;
} else { // value >= 1.0, or value is NaN
if (a == 1.0) {
return negateResult ? -Math.PI/2 : Math.PI/2;
} else {
return Double.NaN;
}
}
}
}
// haversin
private static final double TO_RADIANS = Math.PI / 180D;
private static final double TO_KILOMETERS = 6371.0087714D;
// cos/asin
private static final double ONE_DIV_F2 = 1/2.0;
private static final double ONE_DIV_F3 = 1/6.0;
private static final double ONE_DIV_F4 = 1/24.0;
private static final double PIO2_HI = Double.longBitsToDouble(0x3FF921FB54400000L); // 1.57079632673412561417e+00 first 33 bits of pi/2
private static final double PIO2_LO = Double.longBitsToDouble(0x3DD0B4611A626331L); // 6.07710050650619224932e-11 pi/2 - PIO2_HI
private static final double TWOPI_HI = 4*PIO2_HI;
private static final double TWOPI_LO = 4*PIO2_LO;
private static final int SIN_COS_TABS_SIZE = (1<<11) + 1;
private static final double SIN_COS_DELTA_HI = TWOPI_HI/(SIN_COS_TABS_SIZE-1);
private static final double SIN_COS_DELTA_LO = TWOPI_LO/(SIN_COS_TABS_SIZE-1);
private static final double SIN_COS_INDEXER = 1/(SIN_COS_DELTA_HI+SIN_COS_DELTA_LO);
private static final double[] sinTab = new double[SIN_COS_TABS_SIZE];
private static final double[] cosTab = new double[SIN_COS_TABS_SIZE];
// Max abs value for fast modulo, above which we use regular angle normalization.
// This value must be < (Integer.MAX_VALUE / SIN_COS_INDEXER), to stay in range of int type.
// The higher it is, the higher the error, but also the faster it is for lower values.
// If you set it to ((Integer.MAX_VALUE / SIN_COS_INDEXER) * 0.99), worse accuracy on double range is about 1e-10.
static final double SIN_COS_MAX_VALUE_FOR_INT_MODULO = ((Integer.MAX_VALUE>>9) / SIN_COS_INDEXER) * 0.99;
// Supposed to be >= sin(77.2deg), as fdlibm code is supposed to work with values > 0.975,
// but seems to work well enough as long as value >= sin(25deg).
private static final double ASIN_MAX_VALUE_FOR_TABS = StrictMath.sin(Math.toRadians(73.0));
private static final int ASIN_TABS_SIZE = (1<<13) + 1;
private static final double ASIN_DELTA = ASIN_MAX_VALUE_FOR_TABS/(ASIN_TABS_SIZE - 1);
private static final double ASIN_INDEXER = 1/ASIN_DELTA;
private static final double[] asinTab = new double[ASIN_TABS_SIZE];
private static final double[] asinDer1DivF1Tab = new double[ASIN_TABS_SIZE];
private static final double[] asinDer2DivF2Tab = new double[ASIN_TABS_SIZE];
private static final double[] asinDer3DivF3Tab = new double[ASIN_TABS_SIZE];
private static final double[] asinDer4DivF4Tab = new double[ASIN_TABS_SIZE];
private static final double ASIN_PIO2_HI = Double.longBitsToDouble(0x3FF921FB54442D18L); // 1.57079632679489655800e+00
private static final double ASIN_PIO2_LO = Double.longBitsToDouble(0x3C91A62633145C07L); // 6.12323399573676603587e-17
private static final double ASIN_PS0 = Double.longBitsToDouble(0x3fc5555555555555L); // 1.66666666666666657415e-01
private static final double ASIN_PS1 = Double.longBitsToDouble(0xbfd4d61203eb6f7dL); // -3.25565818622400915405e-01
private static final double ASIN_PS2 = Double.longBitsToDouble(0x3fc9c1550e884455L); // 2.01212532134862925881e-01
private static final double ASIN_PS3 = Double.longBitsToDouble(0xbfa48228b5688f3bL); // -4.00555345006794114027e-02
private static final double ASIN_PS4 = Double.longBitsToDouble(0x3f49efe07501b288L); // 7.91534994289814532176e-04
private static final double ASIN_PS5 = Double.longBitsToDouble(0x3f023de10dfdf709L); // 3.47933107596021167570e-05
private static final double ASIN_QS1 = Double.longBitsToDouble(0xc0033a271c8a2d4bL); // -2.40339491173441421878e+00
private static final double ASIN_QS2 = Double.longBitsToDouble(0x40002ae59c598ac8L); // 2.02094576023350569471e+00
private static final double ASIN_QS3 = Double.longBitsToDouble(0xbfe6066c1b8d0159L); // -6.88283971605453293030e-01
private static final double ASIN_QS4 = Double.longBitsToDouble(0x3fb3b8c5b12e9282L); // 7.70381505559019352791e-02
/** Initializes look-up tables. */
static {
// sin and cos
final int SIN_COS_PI_INDEX = (SIN_COS_TABS_SIZE-1)/2;
final int SIN_COS_PI_MUL_2_INDEX = 2*SIN_COS_PI_INDEX;
final int SIN_COS_PI_MUL_0_5_INDEX = SIN_COS_PI_INDEX/2;
final int SIN_COS_PI_MUL_1_5_INDEX = 3*SIN_COS_PI_INDEX/2;
for (int i=0;i<SIN_COS_TABS_SIZE;i++) {
// angle: in [0,2*PI].
double angle = i * SIN_COS_DELTA_HI + i * SIN_COS_DELTA_LO;
double sinAngle = StrictMath.sin(angle);
double cosAngle = StrictMath.cos(angle);
// For indexes corresponding to null cosine or sine, we make sure the value is zero
// and not an epsilon. This allows for a much better accuracy for results close to zero.
if (i == SIN_COS_PI_INDEX) {
sinAngle = 0.0;
} else if (i == SIN_COS_PI_MUL_2_INDEX) {
sinAngle = 0.0;
} else if (i == SIN_COS_PI_MUL_0_5_INDEX) {
cosAngle = 0.0;
} else if (i == SIN_COS_PI_MUL_1_5_INDEX) {
cosAngle = 0.0;
}
sinTab[i] = sinAngle;
cosTab[i] = cosAngle;
}
// asin
for (int i=0;i<ASIN_TABS_SIZE;i++) {
// x: in [0,ASIN_MAX_VALUE_FOR_TABS].
double x = i * ASIN_DELTA;
asinTab[i] = StrictMath.asin(x);
double oneMinusXSqInv = 1.0/(1-x*x);
double oneMinusXSqInv0_5 = StrictMath.sqrt(oneMinusXSqInv);
double oneMinusXSqInv1_5 = oneMinusXSqInv0_5*oneMinusXSqInv;
double oneMinusXSqInv2_5 = oneMinusXSqInv1_5*oneMinusXSqInv;
double oneMinusXSqInv3_5 = oneMinusXSqInv2_5*oneMinusXSqInv;
asinDer1DivF1Tab[i] = oneMinusXSqInv0_5;
asinDer2DivF2Tab[i] = (x*oneMinusXSqInv1_5) * ONE_DIV_F2;
asinDer3DivF3Tab[i] = ((1+2*x*x)*oneMinusXSqInv2_5) * ONE_DIV_F3;
asinDer4DivF4Tab[i] = ((5+2*x*(2+x*(5-2*x)))*oneMinusXSqInv3_5) * ONE_DIV_F4;
}
}
}

View File

@ -0,0 +1,106 @@
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import static org.apache.lucene.util.SloppyMath.cos;
import static org.apache.lucene.util.SloppyMath.asin;
import static org.apache.lucene.util.SloppyMath.haversin;
public class TestSloppyMath extends LuceneTestCase {
// accuracy for cos()
static double COS_DELTA = 1E-15;
// accuracy for asin()
static double ASIN_DELTA = 1E-7;
public void testCos() {
assertTrue(Double.isNaN(cos(Double.NaN)));
assertTrue(Double.isNaN(cos(Double.NEGATIVE_INFINITY)));
assertTrue(Double.isNaN(cos(Double.POSITIVE_INFINITY)));
assertEquals(StrictMath.cos(1), cos(1), COS_DELTA);
assertEquals(StrictMath.cos(0), cos(0), COS_DELTA);
assertEquals(StrictMath.cos(Math.PI/2), cos(Math.PI/2), COS_DELTA);
assertEquals(StrictMath.cos(-Math.PI/2), cos(-Math.PI/2), COS_DELTA);
assertEquals(StrictMath.cos(Math.PI/4), cos(Math.PI/4), COS_DELTA);
assertEquals(StrictMath.cos(-Math.PI/4), cos(-Math.PI/4), COS_DELTA);
assertEquals(StrictMath.cos(Math.PI*2/3), cos(Math.PI*2/3), COS_DELTA);
assertEquals(StrictMath.cos(-Math.PI*2/3), cos(-Math.PI*2/3), COS_DELTA);
assertEquals(StrictMath.cos(Math.PI/6), cos(Math.PI/6), COS_DELTA);
assertEquals(StrictMath.cos(-Math.PI/6), cos(-Math.PI/6), COS_DELTA);
// testing purely random longs is inefficent, as for stupid parameters we just
// pass thru to Math.cos() instead of doing some huperduper arg reduction
for (int i = 0; i < 10000; i++) {
double d = random().nextDouble() * SloppyMath.SIN_COS_MAX_VALUE_FOR_INT_MODULO;
if (random().nextBoolean()) {
d = -d;
}
assertEquals(StrictMath.cos(d), cos(d), COS_DELTA);
}
}
public void testAsin() {
assertTrue(Double.isNaN(asin(Double.NaN)));
assertTrue(Double.isNaN(asin(2)));
assertTrue(Double.isNaN(asin(-2)));
assertEquals(-Math.PI/2, asin(-1), ASIN_DELTA);
assertEquals(-Math.PI/3, asin(-0.8660254), ASIN_DELTA);
assertEquals(-Math.PI/4, asin(-0.7071068), ASIN_DELTA);
assertEquals(-Math.PI/6, asin(-0.5), ASIN_DELTA);
assertEquals(0, asin(0), ASIN_DELTA);
assertEquals(Math.PI/6, asin(0.5), ASIN_DELTA);
assertEquals(Math.PI/4, asin(0.7071068), ASIN_DELTA);
assertEquals(Math.PI/3, asin(0.8660254), ASIN_DELTA);
assertEquals(Math.PI/2, asin(1), ASIN_DELTA);
// only values -1..1 are useful
for (int i = 0; i < 10000; i++) {
double d = random().nextDouble();
if (random().nextBoolean()) {
d = -d;
}
assertEquals(StrictMath.asin(d), asin(d), ASIN_DELTA);
assertTrue(asin(d) >= -Math.PI/2);
assertTrue(asin(d) <= Math.PI/2);
}
}
public void testHaversin() {
assertTrue(Double.isNaN(haversin(1, 1, 1, Double.NaN)));
assertTrue(Double.isNaN(haversin(1, 1, Double.NaN, 1)));
assertTrue(Double.isNaN(haversin(1, Double.NaN, 1, 1)));
assertTrue(Double.isNaN(haversin(Double.NaN, 1, 1, 1)));
assertEquals(0, haversin(0, 0, 0, 0), 0D);
assertEquals(0, haversin(0, -180, 0, -180), 0D);
assertEquals(0, haversin(0, -180, 0, 180), 0D);
assertEquals(0, haversin(0, 180, 0, 180), 0D);
assertEquals(0, haversin(90, 0, 90, 0), 0D);
assertEquals(0, haversin(90, -180, 90, -180), 0D);
assertEquals(0, haversin(90, -180, 90, 180), 0D);
assertEquals(0, haversin(90, 180, 90, 180), 0D);
// from solr and ES tests (with their respective epsilons)
assertEquals(314.40338, haversin(1, 2, 3, 4), 10e-5);
assertEquals(0, haversin(40.7143528, -74.0059731, 40.7143528, -74.0059731), 0D);
assertEquals(5.286, haversin(40.7143528, -74.0059731, 40.759011, -73.9844722), 0.01D);
assertEquals(0.4621, haversin(40.7143528, -74.0059731, 40.718266, -74.007819), 0.01D);
assertEquals(1.055, haversin(40.7143528, -74.0059731, 40.7051157, -74.0088305), 0.01D);
assertEquals(1.258, haversin(40.7143528, -74.0059731, 40.7247222, -74), 0.01D);
assertEquals(2.029, haversin(40.7143528, -74.0059731, 40.731033, -73.9962255), 0.01D);
assertEquals(8.572, haversin(40.7143528, -74.0059731, 40.65, -73.95), 0.01D);
}
}

View File

@ -29,6 +29,7 @@
<li>Comparison operators: <code>&lt; &lt;= == &gt;= &gt;</code></li>
<li>Common mathematic functions: <code>abs ceil exp floor ln log2 log10 logn max min sqrt pow</code></li>
<li>Trigonometric library functions: <code>acosh acos asinh asin atanh atan atan2 cosh cos sinh sin tanh tan</code></li>
<li>Distance functions: <code>haversin</code></li>
<li>Miscellaneous functions: <code>min, max</code></li>
<li>Arbitrary external variables - see {@link org.apache.lucene.expressions.Bindings}</li>
</ul>

View File

@ -31,6 +31,7 @@ cos = java.lang.Math, cos, 1
cosh = java.lang.Math, cosh, 1
exp = java.lang.Math, exp, 1
floor = java.lang.Math, floor, 1
haversin = org.apache.lucene.util.SloppyMath, haversin, 4
ln = java.lang.Math, log, 1
log10 = java.lang.Math, log10, 1
logn = org.apache.lucene.util.MathUtil, log, 2

View File

@ -1,6 +1,7 @@
package org.apache.lucene.expressions;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.expressions.js.JavascriptCompiler;
@ -10,6 +11,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.CheckHits;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
@ -51,18 +53,24 @@ public class TestDemoExpressions extends LuceneTestCase {
doc.add(newStringField("id", "1", Field.Store.YES));
doc.add(newTextField("body", "some contents and more contents", Field.Store.NO));
doc.add(new NumericDocValuesField("popularity", 5));
doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO));
doc.add(new DoubleField("longitude", -73.9844722, Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "2", Field.Store.YES));
doc.add(newTextField("body", "another document with different contents", Field.Store.NO));
doc.add(new NumericDocValuesField("popularity", 20));
doc.add(new DoubleField("latitude", 40.718266, Field.Store.NO));
doc.add(new DoubleField("longitude", -74.007819, Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(newStringField("id", "3", Field.Store.YES));
doc.add(newTextField("body", "crappy contents", Field.Store.NO));
doc.add(new NumericDocValuesField("popularity", 2));
doc.add(new DoubleField("latitude", 40.7051157, Field.Store.NO));
doc.add(new DoubleField("longitude", -74.0088305, Field.Store.NO));
iw.addDocument(doc);
reader = iw.getReader();
@ -180,4 +188,22 @@ public class TestDemoExpressions extends LuceneTestCase {
assertEquals(expected, actual, CheckHits.explainToleranceDelta(expected, actual));
}
}
public void testDistanceSort() throws Exception {
Expression distance = JavascriptCompiler.compile("haversin(40.7143528,-74.0059731,latitude,longitude)");
SimpleBindings bindings = new SimpleBindings();
bindings.add(new SortField("latitude", SortField.Type.DOUBLE));
bindings.add(new SortField("longitude", SortField.Type.DOUBLE));
Sort sort = new Sort(distance.getSortField(bindings, false));
TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), null, 3, sort);
FieldDoc d = (FieldDoc) td.scoreDocs[0];
assertEquals(0.4621D, (Double)d.fields[0], 1E-4);
d = (FieldDoc) td.scoreDocs[1];
assertEquals(1.0550D, (Double)d.fields[0], 1E-4);
d = (FieldDoc) td.scoreDocs[2];
assertEquals(5.2859D, (Double)d.fields[0], 1E-4);
}
}

View File

@ -157,6 +157,10 @@ public class TestJavascriptFunction extends LuceneTestCase {
assertEvaluatesTo("floor(-1.1)", -2);
}
public void testHaversinMethod() throws Exception {
assertEvaluatesTo("haversin(40.7143528,-74.0059731,40.759011,-73.9844722)", 5.285885589128);
}
public void testLnMethod() throws Exception {
assertEvaluatesTo("ln(0)", Double.NEGATIVE_INFINITY);
assertEvaluatesTo("ln(" + Math.E + ")", 1);