LUCENE-6647: add GeoHash string utility APIs

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1693700 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2015-08-01 10:08:26 +00:00
parent a7ecfc274a
commit b2ac838de4
5 changed files with 274 additions and 44 deletions

View File

@ -147,6 +147,9 @@ New Features
filtering. Range trees can also handle values larger than 64 bits. filtering. Range trees can also handle values larger than 64 bits.
(Adrien Grand, Mike McCandless) (Adrien Grand, Mike McCandless)
* LUCENE-6647: Add GeoHash string utility APIs (Nick Knize, Mike
McCandless).
API Changes API Changes
* LUCENE-6508: Simplify Lock api, there is now just * LUCENE-6508: Simplify Lock api, there is now just

View File

@ -26,7 +26,8 @@ public final class BitUtil {
private static final long MAGIC[] = { private static final long MAGIC[] = {
0x5555555555555555L, 0x3333333333333333L, 0x5555555555555555L, 0x3333333333333333L,
0x0F0F0F0F0F0F0F0FL, 0x00FF00FF00FF00FFL, 0x0F0F0F0F0F0F0F0FL, 0x00FF00FF00FF00FFL,
0x0000FFFF0000FFFFL, 0x00000000FFFFFFFFL 0x0000FFFF0000FFFFL, 0x00000000FFFFFFFFL,
0xAAAAAAAAAAAAAAAAL
}; };
// shift values for bit interleaving // shift values for bit interleaving
private static final short SHIFT[] = {1, 2, 4, 8, 16}; private static final short SHIFT[] = {1, 2, 4, 8, 16};
@ -144,6 +145,13 @@ public final class BitUtil {
return b; return b;
} }
/**
* flip flops odd with even bits
*/
public static final long flipFlop(final long b) {
return ((b & MAGIC[6]) >>> 1) | ((b & MAGIC[0]) << 1 );
}
/** Same as {@link #zigZagEncode(long)} but on integers. */ /** Same as {@link #zigZagEncode(long)} but on integers. */
public static int zigZagEncode(int i) { public static int zigZagEncode(int i) {
return (i >> 31) ^ (i << 1); return (i >> 31) ^ (i << 1);

View File

@ -0,0 +1,153 @@
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Utilities for converting to/from the GeoHash standard
*
* The geohash long format is represented as lon/lat (x/y) interleaved with the 4 least significant bits
* representing the level (1-12) [xyxy...xyxyllll]
*
* This differs from a morton encoded value which interleaves lat/lon (y/x).
*
* @lucene.experimental
*/
public class GeoHashUtils {
public static final char[] BASE_32 = {'0', '1', '2', '3', '4', '5', '6',
'7', '8', '9', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'm', 'n',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};
public static final String BASE_32_STRING = new String(BASE_32);
public static final int PRECISION = 12;
private static final short MORTON_OFFSET = (GeoUtils.BITS<<1) - (PRECISION*5);
/**
* Encode lon/lat to the geohash based long format (lon/lat interleaved, 4 least significant bits = level)
*/
public static final long longEncode(final double lon, final double lat, final int level) {
// shift to appropriate level
final short msf = (short)(((12 - level) * 5) + MORTON_OFFSET);
return ((BitUtil.flipFlop(GeoUtils.mortonHash(lon, lat)) >>> msf) << 4) | level;
}
/**
* Encode from geohash string to the geohash based long format (lon/lat interleaved, 4 least significant bits = level)
*/
public static final long longEncode(final String hash) {
int level = hash.length()-1;
long b;
long l = 0L;
for(char c : hash.toCharArray()) {
b = (long)(BASE_32_STRING.indexOf(c));
l |= (b<<(level--*5));
}
return (l<<4)|hash.length();
}
/**
* Encode to a geohash string from the geohash based long format
*/
public static final String stringEncode(long geoHashLong) {
int level = (int)geoHashLong&15;
geoHashLong >>>= 4;
char[] chars = new char[level];
do {
chars[--level] = BASE_32[(int)(geoHashLong&31L)];
geoHashLong>>>=5;
} while(level > 0);
return new String(chars);
}
/**
* Encode to a geohash string from full resolution longitude, latitude)
*/
public static final String stringEncode(final double lon, final double lat) {
return stringEncode(lon, lat, 12);
}
/**
* Encode to a level specific geohash string from full resolution longitude, latitude
*/
public static final String stringEncode(final double lon, final double lat, final int level) {
// bit twiddle to geohash (since geohash is a swapped (lon/lat) encoding)
final long hashedVal = BitUtil.flipFlop(GeoUtils.mortonHash(lon, lat));
StringBuilder geoHash = new StringBuilder();
short precision = 0;
final short msf = (GeoUtils.BITS<<1)-5;
long mask = 31L<<msf;
do {
geoHash.append(BASE_32[(int)((mask & hashedVal)>>>(msf-(precision*5)))]);
// next 5 bits
mask >>>= 5;
} while (++precision < level);
return geoHash.toString();
}
/**
* Encode to a full precision geohash string from a given morton encoded long value
*/
public static final String stringEncodeFromMortonLong(final long hashedVal) throws Exception {
return stringEncode(hashedVal, PRECISION);
}
/**
* Encode to a geohash string at a given level from a morton long
*/
public static final String stringEncodeFromMortonLong(long hashedVal, final int level) {
// bit twiddle to geohash (since geohash is a swapped (lon/lat) encoding)
hashedVal = BitUtil.flipFlop(hashedVal);
StringBuilder geoHash = new StringBuilder();
short precision = 0;
final short msf = (GeoUtils.BITS<<1)-5;
long mask = 31L<<msf;
do {
geoHash.append(BASE_32[(int)((mask & hashedVal)>>>(msf-(precision*5)))]);
// next 5 bits
mask >>>= 5;
} while (++precision < level);
return geoHash.toString();
}
/**
* Encode to a morton long value from a given geohash string
*/
public static final long mortonEncode(final String hash) {
int level = 11;
long b;
long l = 0L;
for(char c : hash.toCharArray()) {
b = (long)(BASE_32_STRING.indexOf(c));
l |= (b<<((level--*5) + MORTON_OFFSET));
}
return BitUtil.flipFlop(l);
}
/**
* Encode to a morton long value from a given geohash long value
*/
public static final long mortonEncode(final long geoHashLong) {
final int level = (int)(geoHashLong&15);
final short odd = (short)(level & 1);
return BitUtil.flipFlop((geoHashLong >>> 4) << odd) << (((12 - level) * 5) + (MORTON_OFFSET - odd));
}
}

View File

@ -46,6 +46,7 @@ import org.apache.lucene.util.GeoUtils;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.SloppyMath; import org.apache.lucene.util.SloppyMath;
import org.apache.lucene.util.TestGeoUtils;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.BeforeClass; import org.junit.BeforeClass;
@ -68,37 +69,10 @@ public class TestGeoPointQuery extends LuceneTestCase {
// determining the possible haversine error // determining the possible haversine error
private static final int DISTANCE_ERR = 1000; private static final int DISTANCE_ERR = 1000;
// Global bounding box we will "cover" in the random test; we have to make this "smallish" else the queries take very long:
private static double originLat;
private static double originLon;
// private static double range;
private static double lonRange;
private static double latRange;
@BeforeClass @BeforeClass
public static void beforeClass() throws Exception { public static void beforeClass() throws Exception {
directory = newDirectory(); directory = newDirectory();
// when we randomly test the full lat/lon space it can result in very very slow query times, this is due to the
// number of ranges that can be created in degenerate cases.
// Between 1.0 and 3.0:
// range = 2*(random().nextDouble() + 0.5);
// Between 1.0 and 90.0
//lonRange = 1.0 + (90.0 - 1.0) * random().nextDouble();
//latRange = 1.0 + (45.0 - 1.0) * random().nextDouble();
// Between 1.0 and 3.0:
lonRange = 2*(random().nextDouble() + 0.5);
latRange = 2*(random().nextDouble() + 0.5);
originLon = GeoUtils.MIN_LON_INCL + lonRange + (GeoUtils.MAX_LON_INCL - GeoUtils.MIN_LON_INCL - 2*lonRange) * random().nextDouble();
originLon = GeoUtils.normalizeLon(originLon);
originLat = GeoUtils.MIN_LAT_INCL + latRange + (GeoUtils.MAX_LAT_INCL - GeoUtils.MIN_LAT_INCL - 2*latRange) * random().nextDouble();
originLat = GeoUtils.normalizeLat(originLat);
if (VERBOSE) {
System.out.println("TEST: originLon=" + originLon + " lonRange= " + lonRange + " originLat=" + originLat + " latRange=" + latRange);
}
RandomIndexWriter writer = new RandomIndexWriter(random(), directory, RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
newIndexWriterConfig(new MockAnalyzer(random())) newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000)) .setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000))
@ -306,13 +280,13 @@ public class TestGeoPointQuery extends LuceneTestCase {
if (x == 0) { if (x == 0) {
// Identical lat to old point // Identical lat to old point
lats[docID] = lats[oldDocID]; lats[docID] = lats[oldDocID];
lons[docID] = randomLon(); lons[docID] = TestGeoUtils.randomLon();
if (VERBOSE) { if (VERBOSE) {
//System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID] + " (same lat as doc=" + oldDocID + ")"); //System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID] + " (same lat as doc=" + oldDocID + ")");
} }
} else if (x == 1) { } else if (x == 1) {
// Identical lon to old point // Identical lon to old point
lats[docID] = randomLat(); lats[docID] = TestGeoUtils.randomLat();
lons[docID] = lons[oldDocID]; lons[docID] = lons[oldDocID];
if (VERBOSE) { if (VERBOSE) {
//System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID] + " (same lon as doc=" + oldDocID + ")"); //System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID] + " (same lon as doc=" + oldDocID + ")");
@ -327,8 +301,8 @@ public class TestGeoPointQuery extends LuceneTestCase {
} }
} }
} else { } else {
lats[docID] = randomLat(); lats[docID] = TestGeoUtils.randomLat();
lons[docID] = randomLon(); lons[docID] = TestGeoUtils.randomLon();
haveRealDoc = true; haveRealDoc = true;
if (VERBOSE) { if (VERBOSE) {
//System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID]); //System.out.println(" doc=" + docID + " lat=" + lats[docID] + " lon=" + lons[docID]);
@ -618,19 +592,11 @@ public class TestGeoPointQuery extends LuceneTestCase {
|| (tMaxLon - tLon) == 0 || (tMaxLat - tLat) == 0); || (tMaxLon - tLon) == 0 || (tMaxLat - tLat) == 0);
} }
private static double randomLat() {
return GeoUtils.normalizeLat(originLat + latRange * (random().nextDouble() - 0.5));
}
private static double randomLon() {
return GeoUtils.normalizeLon(originLon + lonRange * (random().nextDouble() - 0.5));
}
private static GeoBoundingBox randomBBox() { private static GeoBoundingBox randomBBox() {
double lat0 = randomLat(); double lat0 = TestGeoUtils.randomLat();
double lat1 = randomLat(); double lat1 = TestGeoUtils.randomLat();
double lon0 = randomLon(); double lon0 = TestGeoUtils.randomLon();
double lon1 = randomLon(); double lon1 = TestGeoUtils.randomLon();
if (lat1 < lat0) { if (lat1 < lat0) {
double x = lat0; double x = lat0;

View File

@ -0,0 +1,100 @@
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.junit.BeforeClass;
import org.junit.Test;
/**
* Tests class for methods in GeoUtils
*
* @lucene.experimental
*/
public class TestGeoUtils extends LuceneTestCase {
// Global bounding box we will "cover" in the random test; we have to make this "smallish" else the queries take very long:
private static double originLat;
private static double originLon;
// private static double range;
private static double lonRange;
private static double latRange;
@BeforeClass
public static void beforeClass() throws Exception {
// Between 1.0 and 3.0:
lonRange = 2 * (random().nextDouble() + 0.5);
latRange = 2 * (random().nextDouble() + 0.5);
originLon = GeoUtils.MIN_LON_INCL + lonRange + (GeoUtils.MAX_LON_INCL - GeoUtils.MIN_LON_INCL - 2 * lonRange) * random().nextDouble();
originLon = GeoUtils.normalizeLon(originLon);
originLat = GeoUtils.MIN_LAT_INCL + latRange + (GeoUtils.MAX_LAT_INCL - GeoUtils.MIN_LAT_INCL - 2 * latRange) * random().nextDouble();
originLat = GeoUtils.normalizeLat(originLat);
if (VERBOSE) {
System.out.println("TEST: originLon=" + originLon + " lonRange= " + lonRange + " originLat=" + originLat + " latRange=" + latRange);
}
}
@Test
public void testGeoHash() {
int numPoints = atLeast(100);
String randomGeoHashString;
String mortonGeoHash;
long mortonLongFromGHLong, geoHashLong, mortonLongFromGHString;
int randomLevel;
for (int i = 0; i < numPoints; ++i) {
// random point
double lat = randomLatFullRange();
double lon = randomLonFullRange();
// compute geohash straight from lat/lon and from morton encoded value to ensure they're the same
randomGeoHashString = GeoHashUtils.stringEncode(lon, lat, randomLevel = random().nextInt(12 - 1) + 1);
mortonGeoHash = GeoHashUtils.stringEncodeFromMortonLong(GeoUtils.mortonHash(lon, lat), randomLevel);
assertEquals(randomGeoHashString, mortonGeoHash);
// v&v conversion from lat/lon or geohashstring to geohash long and back to geohash string
geoHashLong = (random().nextBoolean()) ? GeoHashUtils.longEncode(lon, lat, randomLevel) : GeoHashUtils.longEncode(randomGeoHashString);
assertEquals(randomGeoHashString, GeoHashUtils.stringEncode(geoHashLong));
// v&v conversion from geohash long to morton long
mortonLongFromGHString = GeoHashUtils.mortonEncode(randomGeoHashString);
mortonLongFromGHLong = GeoHashUtils.mortonEncode(geoHashLong);
assertEquals(mortonLongFromGHLong, mortonLongFromGHString);
// v&v lat/lon from geohash string and geohash long
assertEquals(GeoUtils.mortonUnhashLat(mortonLongFromGHString), GeoUtils.mortonUnhashLat(mortonLongFromGHLong), 0);
assertEquals(GeoUtils.mortonUnhashLon(mortonLongFromGHString), GeoUtils.mortonUnhashLon(mortonLongFromGHLong), 0);
}
}
public static double randomLatFullRange() {
return (180d * random().nextDouble()) - 90d;
}
public static double randomLonFullRange() {
return (360d * random().nextDouble()) - 180d;
}
public static double randomLat() {
return GeoUtils.normalizeLat(originLat + latRange * (random().nextDouble() - 0.5));
}
public static double randomLon() {
return GeoUtils.normalizeLon(originLon + lonRange * (random().nextDouble() - 0.5));
}
}