From 9c98f9d95801fe6e64f7653667feb30ed80b9b8e Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sun, 6 Oct 2013 20:47:32 +0000 Subject: [PATCH] LUCENE-5258: add distance function to expressions/ git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1529679 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/util/SloppyMath.java | 229 ++++++++++++++++++ .../apache/lucene/util/TestSloppyMath.java | 106 ++++++++ .../apache/lucene/expressions/js/package.html | 1 + .../js/JavascriptCompiler.properties | 1 + .../expressions/TestDemoExpressions.java | 26 ++ .../js/TestJavascriptFunction.java | 4 + 6 files changed, 367 insertions(+) create mode 100644 lucene/core/src/java/org/apache/lucene/util/SloppyMath.java create mode 100644 lucene/core/src/test/org/apache/lucene/util/TestSloppyMath.java diff --git a/lucene/core/src/java/org/apache/lucene/util/SloppyMath.java b/lucene/core/src/java/org/apache/lucene/util/SloppyMath.java new file mode 100644 index 00000000000..d27c8b3677b --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/SloppyMath.java @@ -0,0 +1,229 @@ +package org.apache.lucene.util; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* some code derived from jodk: http://code.google.com/p/jodk/ (apache 2.0) + * asin() derived from fdlibm: http://www.netlib.org/fdlibm/e_asin.c (public domain): + * ============================================================================= + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ============================================================================= + */ + +/** Math functions that trade off accuracy for speed. */ +public class SloppyMath { + + /** + * Returns the distance between two points in decimal degrees. + * @param lat1 Latitude of the first point. + * @param lon1 Longitude of the first point. + * @param lat2 Latitude of the second point. + * @param lon2 Longitude of the second point. + * @return distance in kilometers. + */ + public static double haversin(double lat1, double lon1, double lat2, double lon2) { + double x1 = lat1 * TO_RADIANS; + double x2 = lat2 * TO_RADIANS; + double h1 = (1 - cos(x1 - x2)) / 2; + double h2 = (1 - cos((lon1 - lon2) * TO_RADIANS)) / 2; + double h = h1 + cos(x1) * cos(x2) * h2; + return TO_KILOMETERS * 2 * asin(Math.min(1, Math.sqrt(h))); + } + + /** + * Returns the trigonometric cosine of an angle. + *

+ * Error is around 1E-15. + *

+ * Special cases: + *

+ * @param a an angle, in radians. + * @return the cosine of the argument. + * @see Math#cos(double) + */ + public static double cos(double a) { + if (a < 0.0) { + a = -a; + } + if (a > SIN_COS_MAX_VALUE_FOR_INT_MODULO) { + return Math.cos(a); + } + // index: possibly outside tables range. + int index = (int)(a * SIN_COS_INDEXER + 0.5); + double delta = (a - index * SIN_COS_DELTA_HI) - index * SIN_COS_DELTA_LO; + // Making sure index is within tables range. + // Last value of each table is the same than first, so we ignore it (tabs size minus one) for modulo. + index &= (SIN_COS_TABS_SIZE-2); // index % (SIN_COS_TABS_SIZE-1) + double indexCos = cosTab[index]; + double indexSin = sinTab[index]; + return indexCos + delta * (-indexSin + delta * (-indexCos * ONE_DIV_F2 + delta * (indexSin * ONE_DIV_F3 + delta * indexCos * ONE_DIV_F4))); + } + + /** + * Returns the arc sine of a value. + *

+ * The returned angle is in the range -pi/2 through pi/2. + * Error is around 1E-7. + *

+ * Special cases: + *

+ * @param a the value whose arc sine is to be returned. + * @return arc sine of the argument + * @see Math#asin(double) + */ + // because asin(-x) = -asin(x), asin(x) only needs to be computed on [0,1]. + // ---> we only have to compute asin(x) on [0,1]. + // For values not close to +-1, we use look-up tables; + // for values near +-1, we use code derived from fdlibm. + public static double asin(double a) { + boolean negateResult; + if (a < 0.0) { + a = -a; + negateResult = true; + } else { + negateResult = false; + } + if (a <= ASIN_MAX_VALUE_FOR_TABS) { + int index = (int)(a * ASIN_INDEXER + 0.5); + double delta = a - index * ASIN_DELTA; + double result = asinTab[index] + delta * (asinDer1DivF1Tab[index] + delta * (asinDer2DivF2Tab[index] + delta * (asinDer3DivF3Tab[index] + delta * asinDer4DivF4Tab[index]))); + return negateResult ? -result : result; + } else { // value > ASIN_MAX_VALUE_FOR_TABS, or value is NaN + // This part is derived from fdlibm. + if (a < 1.0) { + double t = (1.0 - a)*0.5; + double p = t*(ASIN_PS0+t*(ASIN_PS1+t*(ASIN_PS2+t*(ASIN_PS3+t*(ASIN_PS4+t*ASIN_PS5))))); + double q = 1.0+t*(ASIN_QS1+t*(ASIN_QS2+t*(ASIN_QS3+t*ASIN_QS4))); + double s = Math.sqrt(t); + double z = s+s*(p/q); + double result = ASIN_PIO2_HI-((z+z)-ASIN_PIO2_LO); + return negateResult ? -result : result; + } else { // value >= 1.0, or value is NaN + if (a == 1.0) { + return negateResult ? -Math.PI/2 : Math.PI/2; + } else { + return Double.NaN; + } + } + } + } + + // haversin + private static final double TO_RADIANS = Math.PI / 180D; + private static final double TO_KILOMETERS = 6371.0087714D; + + // cos/asin + private static final double ONE_DIV_F2 = 1/2.0; + private static final double ONE_DIV_F3 = 1/6.0; + private static final double ONE_DIV_F4 = 1/24.0; + + private static final double PIO2_HI = Double.longBitsToDouble(0x3FF921FB54400000L); // 1.57079632673412561417e+00 first 33 bits of pi/2 + private static final double PIO2_LO = Double.longBitsToDouble(0x3DD0B4611A626331L); // 6.07710050650619224932e-11 pi/2 - PIO2_HI + private static final double TWOPI_HI = 4*PIO2_HI; + private static final double TWOPI_LO = 4*PIO2_LO; + private static final int SIN_COS_TABS_SIZE = (1<<11) + 1; + private static final double SIN_COS_DELTA_HI = TWOPI_HI/(SIN_COS_TABS_SIZE-1); + private static final double SIN_COS_DELTA_LO = TWOPI_LO/(SIN_COS_TABS_SIZE-1); + private static final double SIN_COS_INDEXER = 1/(SIN_COS_DELTA_HI+SIN_COS_DELTA_LO); + private static final double[] sinTab = new double[SIN_COS_TABS_SIZE]; + private static final double[] cosTab = new double[SIN_COS_TABS_SIZE]; + + // Max abs value for fast modulo, above which we use regular angle normalization. + // This value must be < (Integer.MAX_VALUE / SIN_COS_INDEXER), to stay in range of int type. + // The higher it is, the higher the error, but also the faster it is for lower values. + // If you set it to ((Integer.MAX_VALUE / SIN_COS_INDEXER) * 0.99), worse accuracy on double range is about 1e-10. + static final double SIN_COS_MAX_VALUE_FOR_INT_MODULO = ((Integer.MAX_VALUE>>9) / SIN_COS_INDEXER) * 0.99; + + // Supposed to be >= sin(77.2deg), as fdlibm code is supposed to work with values > 0.975, + // but seems to work well enough as long as value >= sin(25deg). + private static final double ASIN_MAX_VALUE_FOR_TABS = StrictMath.sin(Math.toRadians(73.0)); + + private static final int ASIN_TABS_SIZE = (1<<13) + 1; + private static final double ASIN_DELTA = ASIN_MAX_VALUE_FOR_TABS/(ASIN_TABS_SIZE - 1); + private static final double ASIN_INDEXER = 1/ASIN_DELTA; + private static final double[] asinTab = new double[ASIN_TABS_SIZE]; + private static final double[] asinDer1DivF1Tab = new double[ASIN_TABS_SIZE]; + private static final double[] asinDer2DivF2Tab = new double[ASIN_TABS_SIZE]; + private static final double[] asinDer3DivF3Tab = new double[ASIN_TABS_SIZE]; + private static final double[] asinDer4DivF4Tab = new double[ASIN_TABS_SIZE]; + + private static final double ASIN_PIO2_HI = Double.longBitsToDouble(0x3FF921FB54442D18L); // 1.57079632679489655800e+00 + private static final double ASIN_PIO2_LO = Double.longBitsToDouble(0x3C91A62633145C07L); // 6.12323399573676603587e-17 + private static final double ASIN_PS0 = Double.longBitsToDouble(0x3fc5555555555555L); // 1.66666666666666657415e-01 + private static final double ASIN_PS1 = Double.longBitsToDouble(0xbfd4d61203eb6f7dL); // -3.25565818622400915405e-01 + private static final double ASIN_PS2 = Double.longBitsToDouble(0x3fc9c1550e884455L); // 2.01212532134862925881e-01 + private static final double ASIN_PS3 = Double.longBitsToDouble(0xbfa48228b5688f3bL); // -4.00555345006794114027e-02 + private static final double ASIN_PS4 = Double.longBitsToDouble(0x3f49efe07501b288L); // 7.91534994289814532176e-04 + private static final double ASIN_PS5 = Double.longBitsToDouble(0x3f023de10dfdf709L); // 3.47933107596021167570e-05 + private static final double ASIN_QS1 = Double.longBitsToDouble(0xc0033a271c8a2d4bL); // -2.40339491173441421878e+00 + private static final double ASIN_QS2 = Double.longBitsToDouble(0x40002ae59c598ac8L); // 2.02094576023350569471e+00 + private static final double ASIN_QS3 = Double.longBitsToDouble(0xbfe6066c1b8d0159L); // -6.88283971605453293030e-01 + private static final double ASIN_QS4 = Double.longBitsToDouble(0x3fb3b8c5b12e9282L); // 7.70381505559019352791e-02 + + /** Initializes look-up tables. */ + static { + // sin and cos + final int SIN_COS_PI_INDEX = (SIN_COS_TABS_SIZE-1)/2; + final int SIN_COS_PI_MUL_2_INDEX = 2*SIN_COS_PI_INDEX; + final int SIN_COS_PI_MUL_0_5_INDEX = SIN_COS_PI_INDEX/2; + final int SIN_COS_PI_MUL_1_5_INDEX = 3*SIN_COS_PI_INDEX/2; + for (int i=0;i= -Math.PI/2); + assertTrue(asin(d) <= Math.PI/2); + } + } + + public void testHaversin() { + assertTrue(Double.isNaN(haversin(1, 1, 1, Double.NaN))); + assertTrue(Double.isNaN(haversin(1, 1, Double.NaN, 1))); + assertTrue(Double.isNaN(haversin(1, Double.NaN, 1, 1))); + assertTrue(Double.isNaN(haversin(Double.NaN, 1, 1, 1))); + + assertEquals(0, haversin(0, 0, 0, 0), 0D); + assertEquals(0, haversin(0, -180, 0, -180), 0D); + assertEquals(0, haversin(0, -180, 0, 180), 0D); + assertEquals(0, haversin(0, 180, 0, 180), 0D); + assertEquals(0, haversin(90, 0, 90, 0), 0D); + assertEquals(0, haversin(90, -180, 90, -180), 0D); + assertEquals(0, haversin(90, -180, 90, 180), 0D); + assertEquals(0, haversin(90, 180, 90, 180), 0D); + + // from solr and ES tests (with their respective epsilons) + assertEquals(314.40338, haversin(1, 2, 3, 4), 10e-5); + assertEquals(0, haversin(40.7143528, -74.0059731, 40.7143528, -74.0059731), 0D); + assertEquals(5.286, haversin(40.7143528, -74.0059731, 40.759011, -73.9844722), 0.01D); + assertEquals(0.4621, haversin(40.7143528, -74.0059731, 40.718266, -74.007819), 0.01D); + assertEquals(1.055, haversin(40.7143528, -74.0059731, 40.7051157, -74.0088305), 0.01D); + assertEquals(1.258, haversin(40.7143528, -74.0059731, 40.7247222, -74), 0.01D); + assertEquals(2.029, haversin(40.7143528, -74.0059731, 40.731033, -73.9962255), 0.01D); + assertEquals(8.572, haversin(40.7143528, -74.0059731, 40.65, -73.95), 0.01D); + } +} diff --git a/lucene/expressions/src/java/org/apache/lucene/expressions/js/package.html b/lucene/expressions/src/java/org/apache/lucene/expressions/js/package.html index a8fda2f2ed8..27015902d58 100644 --- a/lucene/expressions/src/java/org/apache/lucene/expressions/js/package.html +++ b/lucene/expressions/src/java/org/apache/lucene/expressions/js/package.html @@ -29,6 +29,7 @@
  • Comparison operators: < <= == >= >
  • Common mathematic functions: abs ceil exp floor ln log2 log10 logn max min sqrt pow
  • Trigonometric library functions: acosh acos asinh asin atanh atan atan2 cosh cos sinh sin tanh tan
  • +
  • Distance functions: haversin
  • Miscellaneous functions: min, max
  • Arbitrary external variables - see {@link org.apache.lucene.expressions.Bindings}
  • diff --git a/lucene/expressions/src/resources/org/apache/lucene/expressions/js/JavascriptCompiler.properties b/lucene/expressions/src/resources/org/apache/lucene/expressions/js/JavascriptCompiler.properties index 893daaee1df..8cecb33bf88 100644 --- a/lucene/expressions/src/resources/org/apache/lucene/expressions/js/JavascriptCompiler.properties +++ b/lucene/expressions/src/resources/org/apache/lucene/expressions/js/JavascriptCompiler.properties @@ -31,6 +31,7 @@ cos = java.lang.Math, cos, 1 cosh = java.lang.Math, cosh, 1 exp = java.lang.Math, exp, 1 floor = java.lang.Math, floor, 1 +haversin = org.apache.lucene.util.SloppyMath, haversin, 4 ln = java.lang.Math, log, 1 log10 = java.lang.Math, log10, 1 logn = org.apache.lucene.util.MathUtil, log, 2 diff --git a/lucene/expressions/src/test/org/apache/lucene/expressions/TestDemoExpressions.java b/lucene/expressions/src/test/org/apache/lucene/expressions/TestDemoExpressions.java index 9567154cdbb..f8002eef82a 100644 --- a/lucene/expressions/src/test/org/apache/lucene/expressions/TestDemoExpressions.java +++ b/lucene/expressions/src/test/org/apache/lucene/expressions/TestDemoExpressions.java @@ -1,6 +1,7 @@ package org.apache.lucene.expressions; import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoubleField; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.expressions.js.JavascriptCompiler; @@ -10,6 +11,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.CheckHits; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; @@ -51,18 +53,24 @@ public class TestDemoExpressions extends LuceneTestCase { doc.add(newStringField("id", "1", Field.Store.YES)); doc.add(newTextField("body", "some contents and more contents", Field.Store.NO)); doc.add(new NumericDocValuesField("popularity", 5)); + doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO)); + doc.add(new DoubleField("longitude", -73.9844722, Field.Store.NO)); iw.addDocument(doc); doc = new Document(); doc.add(newStringField("id", "2", Field.Store.YES)); doc.add(newTextField("body", "another document with different contents", Field.Store.NO)); doc.add(new NumericDocValuesField("popularity", 20)); + doc.add(new DoubleField("latitude", 40.718266, Field.Store.NO)); + doc.add(new DoubleField("longitude", -74.007819, Field.Store.NO)); iw.addDocument(doc); doc = new Document(); doc.add(newStringField("id", "3", Field.Store.YES)); doc.add(newTextField("body", "crappy contents", Field.Store.NO)); doc.add(new NumericDocValuesField("popularity", 2)); + doc.add(new DoubleField("latitude", 40.7051157, Field.Store.NO)); + doc.add(new DoubleField("longitude", -74.0088305, Field.Store.NO)); iw.addDocument(doc); reader = iw.getReader(); @@ -180,4 +188,22 @@ public class TestDemoExpressions extends LuceneTestCase { assertEquals(expected, actual, CheckHits.explainToleranceDelta(expected, actual)); } } + + public void testDistanceSort() throws Exception { + Expression distance = JavascriptCompiler.compile("haversin(40.7143528,-74.0059731,latitude,longitude)"); + SimpleBindings bindings = new SimpleBindings(); + bindings.add(new SortField("latitude", SortField.Type.DOUBLE)); + bindings.add(new SortField("longitude", SortField.Type.DOUBLE)); + Sort sort = new Sort(distance.getSortField(bindings, false)); + TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), null, 3, sort); + + FieldDoc d = (FieldDoc) td.scoreDocs[0]; + assertEquals(0.4621D, (Double)d.fields[0], 1E-4); + + d = (FieldDoc) td.scoreDocs[1]; + assertEquals(1.0550D, (Double)d.fields[0], 1E-4); + + d = (FieldDoc) td.scoreDocs[2]; + assertEquals(5.2859D, (Double)d.fields[0], 1E-4); + } } diff --git a/lucene/expressions/src/test/org/apache/lucene/expressions/js/TestJavascriptFunction.java b/lucene/expressions/src/test/org/apache/lucene/expressions/js/TestJavascriptFunction.java index b867808cacc..e56183c4fa7 100644 --- a/lucene/expressions/src/test/org/apache/lucene/expressions/js/TestJavascriptFunction.java +++ b/lucene/expressions/src/test/org/apache/lucene/expressions/js/TestJavascriptFunction.java @@ -157,6 +157,10 @@ public class TestJavascriptFunction extends LuceneTestCase { assertEvaluatesTo("floor(-1.1)", -2); } + public void testHaversinMethod() throws Exception { + assertEvaluatesTo("haversin(40.7143528,-74.0059731,40.759011,-73.9844722)", 5.285885589128); + } + public void testLnMethod() throws Exception { assertEvaluatesTo("ln(0)", Double.NEGATIVE_INFINITY); assertEvaluatesTo("ln(" + Math.E + ")", 1);