LUCENE-6647: add GeoHash string utility APIs

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1693700 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2015-08-01 10:08:26 +00:00
parent a7ecfc274a
commit b2ac838de4
5 changed files with 274 additions and 44 deletions

View File

@ -147,6 +147,9 @@ New Features
filtering. Range trees can also handle values larger than 64 bits.
(Adrien Grand, Mike McCandless)
* LUCENE-6647: Add GeoHash string utility APIs (Nick Knize, Mike
McCandless).
API Changes
* LUCENE-6508: Simplify Lock api, there is now just

View File

@ -26,7 +26,8 @@ public final class BitUtil {
private static final long MAGIC[] = {
0x5555555555555555L, 0x3333333333333333L,
0x0F0F0F0F0F0F0F0FL, 0x00FF00FF00FF00FFL,
0x0000FFFF0000FFFFL, 0x00000000FFFFFFFFL
0x0000FFFF0000FFFFL, 0x00000000FFFFFFFFL,
0xAAAAAAAAAAAAAAAAL
};
// shift values for bit interleaving
private static final short SHIFT[] = {1, 2, 4, 8, 16};
@ -144,6 +145,13 @@ public final class BitUtil {
return b;
}
/**
* flip flops odd with even bits
*/
public static final long flipFlop(final long b) {
return ((b & MAGIC[6]) >>> 1) | ((b & MAGIC[0]) << 1 );
}
/** Same as {@link #zigZagEncode(long)} but on integers. */
public static int zigZagEncode(int i) {
return (i >> 31) ^ (i << 1);

View File

@ -0,0 +1,153 @@
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Utilities for converting to/from the GeoHash standard
*
* The geohash long format is represented as lon/lat (x/y) interleaved with the 4 least significant bits
* representing the level (1-12) [xyxy...xyxyllll]
*
* This differs from a morton encoded value which interleaves lat/lon (y/x).
*
* @lucene.experimental
*/
public class GeoHashUtils {
public static final char[] BASE_32 = {'0', '1', '2', '3', '4', '5', '6',
'7', '8', '9', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'm', 'n',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};
public static final String BASE_32_STRING = new String(BASE_32);
public static final int PRECISION = 12;
private static final short MORTON_OFFSET = (GeoUtils.BITS<<1) - (PRECISION*5);
/**
* Encode lon/lat to the geohash based long format (lon/lat interleaved, 4 least significant bits = level)
*/
public static final long longEncode(final double lon, final double lat, final int level) {
// shift to appropriate level
final short msf = (short)(((12 - level) * 5) + MORTON_OFFSET);
return ((BitUtil.flipFlop(GeoUtils.mortonHash(lon, lat)) >>> msf) << 4) | level;
}
/**
* Encode from geohash string to the geohash based long format (lon/lat interleaved, 4 least significant bits = level)
*/
public static final long longEncode(final String hash) {
int level = hash.length()-1;
long b;
long l = 0L;
for(char c : hash.toCharArray()) {
b = (long)(BASE_32_STRING.indexOf(c));
l |= (b<<(level--*5));
}
return (l<<4)|hash.length();
}
/**
* Encode to a geohash string from the geohash based long format
*/
public static final String stringEncode(long geoHashLong) {
int level = (int)geoHashLong&15;
geoHashLong >>>= 4;
char[] chars = new char[level];
do {
chars[--level] = BASE_32[(int)(geoHashLong&31L)];
geoHashLong>>>=5;
} while(level > 0);
return new String(chars);
}
/**
* Encode to a geohash string from full resolution longitude, latitude)
*/
public static final String stringEncode(final double lon, final double lat) {
return stringEncode(lon, lat, 12);
}
/**
* Encode to a level specific geohash string from full resolution longitude, latitude
*/
public static final String stringEncode(final double lon, final double lat, final int level) {
// bit twiddle to geohash (since geohash is a swapped (lon/lat) encoding)
final long hashedVal = BitUtil.flipFlop(GeoUtils.mortonHash(lon, lat));
StringBuilder geoHash = new StringBuilder();
short precision = 0;
final short msf = (GeoUtils.BITS<<1)-5;
long mask = 31L<<msf;
do {
geoHash.append(BASE_32[(int)((mask & hashedVal)>>>(msf-(precision*5)))]);
// next 5 bits
mask >>>= 5;
} while (++precision < level);
return geoHash.toString();
}
/**
* Encode to a full precision geohash string from a given morton encoded long value
*/
public static final String stringEncodeFromMortonLong(final long hashedVal) throws Exception {
return stringEncode(hashedVal, PRECISION);
}
/**
* Encode to a geohash string at a given level from a morton long
*/
public static final String stringEncodeFromMortonLong(long hashedVal, final int level) {
// bit twiddle to geohash (since geohash is a swapped (lon/lat) encoding)
hashedVal = BitUtil.flipFlop(hashedVal);
StringBuilder geoHash = new StringBuilder();
short precision = 0;
final short msf = (GeoUtils.BITS<<1)-5;
long mask = 31L<<msf;
do {
geoHash.append(BASE_32[(int)((mask & hashedVal)>>>(msf-(precision*5)))]);
// next 5 bits
mask >>>= 5;
} while (++precision < level);
return geoHash.toString();
}
/**
* Encode to a morton long value from a given geohash string
*/
public static final long mortonEncode(final String hash) {
int level = 11;
long b;
long l = 0L;
for(char c : hash.toCharArray()) {
b = (long)(BASE_32_STRING.indexOf(c));
l |= (b<<((level--*5) + MORTON_OFFSET));
}
return BitUtil.flipFlop(l);
}
/**
* Encode to a morton long value from a given geohash long value
*/
public static final long mortonEncode(final long geoHashLong) {
final int level = (int)(geoHashLong&15);
final short odd = (short)(level & 1);
return BitUtil.flipFlop((geoHashLong >>> 4) << odd) << (((12 - level) * 5) + (MORTON_OFFSET - odd));
}
}

View File

@ -46,6 +46,7 @@ import org.apache.lucene.util.GeoUtils;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.SloppyMath;
import org.apache.lucene.util.TestGeoUtils;
import org.apache.lucene.util.TestUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@ -68,37 +69,10 @@ public class TestGeoPointQuery extends LuceneTestCase {
// determining the possible haversine error
private static final int DISTANCE_ERR = 1000;
// Global bounding box we will "cover" in the random test; we have to make this "smallish" else the queries take very long:
private static double originLat;
private static double originLon;
// private static double range;
private static double lonRange;
private static double latRange;
@BeforeClass
public static void beforeClass() throws Exception {
directory = newDirectory();
// when we randomly test the full lat/lon space it can result in very very slow query times, this is due to the
// number of ranges that can be created in degenerate cases.
// Between 1.0 and 3.0:
// range = 2*(random().nextDouble() + 0.5);
// Between 1.0 and 90.0
//lonRange = 1.0 + (90.0 - 1.0) * random().nextDouble();
//latRange = 1.0 + (45.0 - 1.0) * random().nextDouble();
// Between 1.0 and 3.0:
lonRange = 2*(random().nextDouble() + 0.5);
latRange = 2*(random().nextDouble() + 0.5);
originLon = GeoUtils.MIN_LON_INCL + lonRange + (GeoUtils.MAX_LON_INCL - GeoUtils.MIN_LON_INCL - 2*lonRange) * random().nextDouble();
originLon = GeoUtils.normalizeLon(originLon);
originLat = GeoUtils.MIN_LAT_INCL + latRange + (GeoUtils.MAX_LAT_INCL - GeoUtils.MIN_LAT_INCL - 2*latRange) * random().nextDouble();
originLat = GeoUtils.normalizeLat(originLat);
if (VERBOSE) {
System.out.println("TEST: originLon=" + originLon + " lonRange= " + lonRange + " originLat=" + originLat + " latRange=" + latRange);
}
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000))
@ -306,13 +280,13 @@ public class TestGeoPointQuery extends LuceneTestCase {
if (x == 0) {
// Identical lat to old point
lats[docID] = lats[oldDocID];
lons[docID] = randomLon();
lons[docID] = TestGeoUtils.randomLon();
if (VERBOSE) {
//System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID] + " (same lat as doc=" + oldDocID + ")");
}
} else if (x == 1) {
// Identical lon to old point
lats[docID] = randomLat();
lats[docID] = TestGeoUtils.randomLat();
lons[docID] = lons[oldDocID];
if (VERBOSE) {
//System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID] + " (same lon as doc=" + oldDocID + ")");
@ -327,8 +301,8 @@ public class TestGeoPointQuery extends LuceneTestCase {
}
}
} else {
lats[docID] = randomLat();
lons[docID] = randomLon();
lats[docID] = TestGeoUtils.randomLat();
lons[docID] = TestGeoUtils.randomLon();
haveRealDoc = true;
if (VERBOSE) {
//System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID]);
@ -618,19 +592,11 @@ public class TestGeoPointQuery extends LuceneTestCase {
|| (tMaxLon - tLon) == 0 || (tMaxLat - tLat) == 0);
}
private static double randomLat() {
return GeoUtils.normalizeLat(originLat + latRange * (random().nextDouble() - 0.5));
}
private static double randomLon() {
return GeoUtils.normalizeLon(originLon + lonRange * (random().nextDouble() - 0.5));
}
private static GeoBoundingBox randomBBox() {
double lat0 = randomLat();
double lat1 = randomLat();
double lon0 = randomLon();
double lon1 = randomLon();
double lat0 = TestGeoUtils.randomLat();
double lat1 = TestGeoUtils.randomLat();
double lon0 = TestGeoUtils.randomLon();
double lon1 = TestGeoUtils.randomLon();
if (lat1 < lat0) {
double x = lat0;

View File

@ -0,0 +1,100 @@
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.junit.BeforeClass;
import org.junit.Test;
/**
* Tests class for methods in GeoUtils
*
* @lucene.experimental
*/
public class TestGeoUtils extends LuceneTestCase {
// Global bounding box we will "cover" in the random test; we have to make this "smallish" else the queries take very long:
private static double originLat;
private static double originLon;
// private static double range;
private static double lonRange;
private static double latRange;
@BeforeClass
public static void beforeClass() throws Exception {
// Between 1.0 and 3.0:
lonRange = 2 * (random().nextDouble() + 0.5);
latRange = 2 * (random().nextDouble() + 0.5);
originLon = GeoUtils.MIN_LON_INCL + lonRange + (GeoUtils.MAX_LON_INCL - GeoUtils.MIN_LON_INCL - 2 * lonRange) * random().nextDouble();
originLon = GeoUtils.normalizeLon(originLon);
originLat = GeoUtils.MIN_LAT_INCL + latRange + (GeoUtils.MAX_LAT_INCL - GeoUtils.MIN_LAT_INCL - 2 * latRange) * random().nextDouble();
originLat = GeoUtils.normalizeLat(originLat);
if (VERBOSE) {
System.out.println("TEST: originLon=" + originLon + " lonRange= " + lonRange + " originLat=" + originLat + " latRange=" + latRange);
}
}
@Test
public void testGeoHash() {
int numPoints = atLeast(100);
String randomGeoHashString;
String mortonGeoHash;
long mortonLongFromGHLong, geoHashLong, mortonLongFromGHString;
int randomLevel;
for (int i = 0; i < numPoints; ++i) {
// random point
double lat = randomLatFullRange();
double lon = randomLonFullRange();
// compute geohash straight from lat/lon and from morton encoded value to ensure they're the same
randomGeoHashString = GeoHashUtils.stringEncode(lon, lat, randomLevel = random().nextInt(12 - 1) + 1);
mortonGeoHash = GeoHashUtils.stringEncodeFromMortonLong(GeoUtils.mortonHash(lon, lat), randomLevel);
assertEquals(randomGeoHashString, mortonGeoHash);
// v&v conversion from lat/lon or geohashstring to geohash long and back to geohash string
geoHashLong = (random().nextBoolean()) ? GeoHashUtils.longEncode(lon, lat, randomLevel) : GeoHashUtils.longEncode(randomGeoHashString);
assertEquals(randomGeoHashString, GeoHashUtils.stringEncode(geoHashLong));
// v&v conversion from geohash long to morton long
mortonLongFromGHString = GeoHashUtils.mortonEncode(randomGeoHashString);
mortonLongFromGHLong = GeoHashUtils.mortonEncode(geoHashLong);
assertEquals(mortonLongFromGHLong, mortonLongFromGHString);
// v&v lat/lon from geohash string and geohash long
assertEquals(GeoUtils.mortonUnhashLat(mortonLongFromGHString), GeoUtils.mortonUnhashLat(mortonLongFromGHLong), 0);
assertEquals(GeoUtils.mortonUnhashLon(mortonLongFromGHString), GeoUtils.mortonUnhashLon(mortonLongFromGHLong), 0);
}
}
public static double randomLatFullRange() {
return (180d * random().nextDouble()) - 90d;
}
public static double randomLonFullRange() {
return (360d * random().nextDouble()) - 180d;
}
public static double randomLat() {
return GeoUtils.normalizeLat(originLat + latRange * (random().nextDouble() - 0.5));
}
public static double randomLon() {
return GeoUtils.normalizeLon(originLon + lonRange * (random().nextDouble() - 0.5));
}
}