mirror of https://github.com/apache/lucene.git
LUCENE-6647: add GeoHash string utility APIs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1693700 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a7ecfc274a
commit
b2ac838de4
|
@ -147,6 +147,9 @@ New Features
|
|||
filtering. Range trees can also handle values larger than 64 bits.
|
||||
(Adrien Grand, Mike McCandless)
|
||||
|
||||
* LUCENE-6647: Add GeoHash string utility APIs (Nick Knize, Mike
|
||||
McCandless).
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-6508: Simplify Lock api, there is now just
|
||||
|
|
|
@ -26,7 +26,8 @@ public final class BitUtil {
|
|||
private static final long MAGIC[] = {
|
||||
0x5555555555555555L, 0x3333333333333333L,
|
||||
0x0F0F0F0F0F0F0F0FL, 0x00FF00FF00FF00FFL,
|
||||
0x0000FFFF0000FFFFL, 0x00000000FFFFFFFFL
|
||||
0x0000FFFF0000FFFFL, 0x00000000FFFFFFFFL,
|
||||
0xAAAAAAAAAAAAAAAAL
|
||||
};
|
||||
// shift values for bit interleaving
|
||||
private static final short SHIFT[] = {1, 2, 4, 8, 16};
|
||||
|
@ -144,6 +145,13 @@ public final class BitUtil {
|
|||
return b;
|
||||
}
|
||||
|
||||
/**
|
||||
* flip flops odd with even bits
|
||||
*/
|
||||
public static final long flipFlop(final long b) {
|
||||
return ((b & MAGIC[6]) >>> 1) | ((b & MAGIC[0]) << 1 );
|
||||
}
|
||||
|
||||
/** Same as {@link #zigZagEncode(long)} but on integers. */
|
||||
public static int zigZagEncode(int i) {
|
||||
return (i >> 31) ^ (i << 1);
|
||||
|
|
|
@ -0,0 +1,153 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Utilities for converting to/from the GeoHash standard
|
||||
*
|
||||
* The geohash long format is represented as lon/lat (x/y) interleaved with the 4 least significant bits
|
||||
* representing the level (1-12) [xyxy...xyxyllll]
|
||||
*
|
||||
* This differs from a morton encoded value which interleaves lat/lon (y/x).
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class GeoHashUtils {
|
||||
public static final char[] BASE_32 = {'0', '1', '2', '3', '4', '5', '6',
|
||||
'7', '8', '9', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'm', 'n',
|
||||
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};
|
||||
|
||||
public static final String BASE_32_STRING = new String(BASE_32);
|
||||
|
||||
public static final int PRECISION = 12;
|
||||
private static final short MORTON_OFFSET = (GeoUtils.BITS<<1) - (PRECISION*5);
|
||||
|
||||
/**
|
||||
* Encode lon/lat to the geohash based long format (lon/lat interleaved, 4 least significant bits = level)
|
||||
*/
|
||||
public static final long longEncode(final double lon, final double lat, final int level) {
|
||||
// shift to appropriate level
|
||||
final short msf = (short)(((12 - level) * 5) + MORTON_OFFSET);
|
||||
return ((BitUtil.flipFlop(GeoUtils.mortonHash(lon, lat)) >>> msf) << 4) | level;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode from geohash string to the geohash based long format (lon/lat interleaved, 4 least significant bits = level)
|
||||
*/
|
||||
public static final long longEncode(final String hash) {
|
||||
int level = hash.length()-1;
|
||||
long b;
|
||||
long l = 0L;
|
||||
for(char c : hash.toCharArray()) {
|
||||
b = (long)(BASE_32_STRING.indexOf(c));
|
||||
l |= (b<<(level--*5));
|
||||
}
|
||||
return (l<<4)|hash.length();
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode to a geohash string from the geohash based long format
|
||||
*/
|
||||
public static final String stringEncode(long geoHashLong) {
|
||||
int level = (int)geoHashLong&15;
|
||||
geoHashLong >>>= 4;
|
||||
char[] chars = new char[level];
|
||||
do {
|
||||
chars[--level] = BASE_32[(int)(geoHashLong&31L)];
|
||||
geoHashLong>>>=5;
|
||||
} while(level > 0);
|
||||
|
||||
return new String(chars);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode to a geohash string from full resolution longitude, latitude)
|
||||
*/
|
||||
public static final String stringEncode(final double lon, final double lat) {
|
||||
return stringEncode(lon, lat, 12);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode to a level specific geohash string from full resolution longitude, latitude
|
||||
*/
|
||||
public static final String stringEncode(final double lon, final double lat, final int level) {
|
||||
// bit twiddle to geohash (since geohash is a swapped (lon/lat) encoding)
|
||||
final long hashedVal = BitUtil.flipFlop(GeoUtils.mortonHash(lon, lat));
|
||||
|
||||
StringBuilder geoHash = new StringBuilder();
|
||||
short precision = 0;
|
||||
final short msf = (GeoUtils.BITS<<1)-5;
|
||||
long mask = 31L<<msf;
|
||||
do {
|
||||
geoHash.append(BASE_32[(int)((mask & hashedVal)>>>(msf-(precision*5)))]);
|
||||
// next 5 bits
|
||||
mask >>>= 5;
|
||||
} while (++precision < level);
|
||||
return geoHash.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode to a full precision geohash string from a given morton encoded long value
|
||||
*/
|
||||
public static final String stringEncodeFromMortonLong(final long hashedVal) throws Exception {
|
||||
return stringEncode(hashedVal, PRECISION);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode to a geohash string at a given level from a morton long
|
||||
*/
|
||||
public static final String stringEncodeFromMortonLong(long hashedVal, final int level) {
|
||||
// bit twiddle to geohash (since geohash is a swapped (lon/lat) encoding)
|
||||
hashedVal = BitUtil.flipFlop(hashedVal);
|
||||
|
||||
StringBuilder geoHash = new StringBuilder();
|
||||
short precision = 0;
|
||||
final short msf = (GeoUtils.BITS<<1)-5;
|
||||
long mask = 31L<<msf;
|
||||
do {
|
||||
geoHash.append(BASE_32[(int)((mask & hashedVal)>>>(msf-(precision*5)))]);
|
||||
// next 5 bits
|
||||
mask >>>= 5;
|
||||
} while (++precision < level);
|
||||
return geoHash.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode to a morton long value from a given geohash string
|
||||
*/
|
||||
public static final long mortonEncode(final String hash) {
|
||||
int level = 11;
|
||||
long b;
|
||||
long l = 0L;
|
||||
for(char c : hash.toCharArray()) {
|
||||
b = (long)(BASE_32_STRING.indexOf(c));
|
||||
l |= (b<<((level--*5) + MORTON_OFFSET));
|
||||
}
|
||||
return BitUtil.flipFlop(l);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode to a morton long value from a given geohash long value
|
||||
*/
|
||||
public static final long mortonEncode(final long geoHashLong) {
|
||||
final int level = (int)(geoHashLong&15);
|
||||
final short odd = (short)(level & 1);
|
||||
|
||||
return BitUtil.flipFlop((geoHashLong >>> 4) << odd) << (((12 - level) * 5) + (MORTON_OFFSET - odd));
|
||||
}
|
||||
}
|
|
@ -46,6 +46,7 @@ import org.apache.lucene.util.GeoUtils;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.SloppyMath;
|
||||
import org.apache.lucene.util.TestGeoUtils;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
@ -68,37 +69,10 @@ public class TestGeoPointQuery extends LuceneTestCase {
|
|||
// determining the possible haversine error
|
||||
private static final int DISTANCE_ERR = 1000;
|
||||
|
||||
// Global bounding box we will "cover" in the random test; we have to make this "smallish" else the queries take very long:
|
||||
private static double originLat;
|
||||
private static double originLon;
|
||||
// private static double range;
|
||||
private static double lonRange;
|
||||
private static double latRange;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
directory = newDirectory();
|
||||
|
||||
// when we randomly test the full lat/lon space it can result in very very slow query times, this is due to the
|
||||
// number of ranges that can be created in degenerate cases.
|
||||
|
||||
// Between 1.0 and 3.0:
|
||||
// range = 2*(random().nextDouble() + 0.5);
|
||||
// Between 1.0 and 90.0
|
||||
//lonRange = 1.0 + (90.0 - 1.0) * random().nextDouble();
|
||||
//latRange = 1.0 + (45.0 - 1.0) * random().nextDouble();
|
||||
|
||||
// Between 1.0 and 3.0:
|
||||
lonRange = 2*(random().nextDouble() + 0.5);
|
||||
latRange = 2*(random().nextDouble() + 0.5);
|
||||
|
||||
originLon = GeoUtils.MIN_LON_INCL + lonRange + (GeoUtils.MAX_LON_INCL - GeoUtils.MIN_LON_INCL - 2*lonRange) * random().nextDouble();
|
||||
originLon = GeoUtils.normalizeLon(originLon);
|
||||
originLat = GeoUtils.MIN_LAT_INCL + latRange + (GeoUtils.MAX_LAT_INCL - GeoUtils.MIN_LAT_INCL - 2*latRange) * random().nextDouble();
|
||||
originLat = GeoUtils.normalizeLat(originLat);
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: originLon=" + originLon + " lonRange= " + lonRange + " originLat=" + originLat + " latRange=" + latRange);
|
||||
}
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
|
||||
newIndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000))
|
||||
|
@ -306,13 +280,13 @@ public class TestGeoPointQuery extends LuceneTestCase {
|
|||
if (x == 0) {
|
||||
// Identical lat to old point
|
||||
lats[docID] = lats[oldDocID];
|
||||
lons[docID] = randomLon();
|
||||
lons[docID] = TestGeoUtils.randomLon();
|
||||
if (VERBOSE) {
|
||||
//System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID] + " (same lat as doc=" + oldDocID + ")");
|
||||
}
|
||||
} else if (x == 1) {
|
||||
// Identical lon to old point
|
||||
lats[docID] = randomLat();
|
||||
lats[docID] = TestGeoUtils.randomLat();
|
||||
lons[docID] = lons[oldDocID];
|
||||
if (VERBOSE) {
|
||||
//System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID] + " (same lon as doc=" + oldDocID + ")");
|
||||
|
@ -327,8 +301,8 @@ public class TestGeoPointQuery extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
lats[docID] = randomLat();
|
||||
lons[docID] = randomLon();
|
||||
lats[docID] = TestGeoUtils.randomLat();
|
||||
lons[docID] = TestGeoUtils.randomLon();
|
||||
haveRealDoc = true;
|
||||
if (VERBOSE) {
|
||||
//System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID]);
|
||||
|
@ -618,19 +592,11 @@ public class TestGeoPointQuery extends LuceneTestCase {
|
|||
|| (tMaxLon - tLon) == 0 || (tMaxLat - tLat) == 0);
|
||||
}
|
||||
|
||||
private static double randomLat() {
|
||||
return GeoUtils.normalizeLat(originLat + latRange * (random().nextDouble() - 0.5));
|
||||
}
|
||||
|
||||
private static double randomLon() {
|
||||
return GeoUtils.normalizeLon(originLon + lonRange * (random().nextDouble() - 0.5));
|
||||
}
|
||||
|
||||
private static GeoBoundingBox randomBBox() {
|
||||
double lat0 = randomLat();
|
||||
double lat1 = randomLat();
|
||||
double lon0 = randomLon();
|
||||
double lon1 = randomLon();
|
||||
double lat0 = TestGeoUtils.randomLat();
|
||||
double lat1 = TestGeoUtils.randomLat();
|
||||
double lon0 = TestGeoUtils.randomLon();
|
||||
double lon1 = TestGeoUtils.randomLon();
|
||||
|
||||
if (lat1 < lat0) {
|
||||
double x = lat0;
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Tests class for methods in GeoUtils
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class TestGeoUtils extends LuceneTestCase {
|
||||
|
||||
// Global bounding box we will "cover" in the random test; we have to make this "smallish" else the queries take very long:
|
||||
private static double originLat;
|
||||
private static double originLon;
|
||||
// private static double range;
|
||||
private static double lonRange;
|
||||
private static double latRange;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
// Between 1.0 and 3.0:
|
||||
lonRange = 2 * (random().nextDouble() + 0.5);
|
||||
latRange = 2 * (random().nextDouble() + 0.5);
|
||||
|
||||
originLon = GeoUtils.MIN_LON_INCL + lonRange + (GeoUtils.MAX_LON_INCL - GeoUtils.MIN_LON_INCL - 2 * lonRange) * random().nextDouble();
|
||||
originLon = GeoUtils.normalizeLon(originLon);
|
||||
originLat = GeoUtils.MIN_LAT_INCL + latRange + (GeoUtils.MAX_LAT_INCL - GeoUtils.MIN_LAT_INCL - 2 * latRange) * random().nextDouble();
|
||||
originLat = GeoUtils.normalizeLat(originLat);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: originLon=" + originLon + " lonRange= " + lonRange + " originLat=" + originLat + " latRange=" + latRange);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGeoHash() {
|
||||
int numPoints = atLeast(100);
|
||||
String randomGeoHashString;
|
||||
String mortonGeoHash;
|
||||
long mortonLongFromGHLong, geoHashLong, mortonLongFromGHString;
|
||||
int randomLevel;
|
||||
for (int i = 0; i < numPoints; ++i) {
|
||||
// random point
|
||||
double lat = randomLatFullRange();
|
||||
double lon = randomLonFullRange();
|
||||
|
||||
// compute geohash straight from lat/lon and from morton encoded value to ensure they're the same
|
||||
randomGeoHashString = GeoHashUtils.stringEncode(lon, lat, randomLevel = random().nextInt(12 - 1) + 1);
|
||||
mortonGeoHash = GeoHashUtils.stringEncodeFromMortonLong(GeoUtils.mortonHash(lon, lat), randomLevel);
|
||||
assertEquals(randomGeoHashString, mortonGeoHash);
|
||||
|
||||
// v&v conversion from lat/lon or geohashstring to geohash long and back to geohash string
|
||||
geoHashLong = (random().nextBoolean()) ? GeoHashUtils.longEncode(lon, lat, randomLevel) : GeoHashUtils.longEncode(randomGeoHashString);
|
||||
assertEquals(randomGeoHashString, GeoHashUtils.stringEncode(geoHashLong));
|
||||
|
||||
// v&v conversion from geohash long to morton long
|
||||
mortonLongFromGHString = GeoHashUtils.mortonEncode(randomGeoHashString);
|
||||
mortonLongFromGHLong = GeoHashUtils.mortonEncode(geoHashLong);
|
||||
assertEquals(mortonLongFromGHLong, mortonLongFromGHString);
|
||||
|
||||
// v&v lat/lon from geohash string and geohash long
|
||||
assertEquals(GeoUtils.mortonUnhashLat(mortonLongFromGHString), GeoUtils.mortonUnhashLat(mortonLongFromGHLong), 0);
|
||||
assertEquals(GeoUtils.mortonUnhashLon(mortonLongFromGHString), GeoUtils.mortonUnhashLon(mortonLongFromGHLong), 0);
|
||||
}
|
||||
}
|
||||
|
||||
public static double randomLatFullRange() {
|
||||
return (180d * random().nextDouble()) - 90d;
|
||||
}
|
||||
|
||||
public static double randomLonFullRange() {
|
||||
return (360d * random().nextDouble()) - 180d;
|
||||
}
|
||||
|
||||
public static double randomLat() {
|
||||
return GeoUtils.normalizeLat(originLat + latRange * (random().nextDouble() - 0.5));
|
||||
}
|
||||
|
||||
public static double randomLon() {
|
||||
return GeoUtils.normalizeLon(originLon + lonRange * (random().nextDouble() - 0.5));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue