Fix overflow error in parsing of long geohashes (#29418)

Fixes a possible overflow error that geohashes longer than 12 characters
can cause during parsing.

Fixes #24616
This commit is contained in:
Igor Motov 2018-04-16 12:37:38 -04:00 committed by GitHub
parent 34ec403a2e
commit e334baf6fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 40 additions and 5 deletions

View File

@ -92,6 +92,16 @@ format was changed early on to conform to the format used by GeoJSON.
==================================================
[NOTE]
A point can be expressed as a http://en.wikipedia.org/wiki/Geohash[geohash].
Geohashes are https://en.wikipedia.org/wiki/Base32[base32] encoded strings of
the bits of the latitude and longitude interleaved. Each character in a geohash
adds additional 5 bits to the precision. So the longer the hash, the more
precise it is. For the indexing purposed geohashs are translated into
latitude-longitude pairs. During this process only first 12 characters are
used, so specifying more than 12 characters in a geohash doesn't increase the
precision. The 12 characters provide 60 bits, which should reduce a possible
error to less than 2cm.
[[geo-point-params]]
==== Parameters for `geo_point` fields

View File

@ -72,15 +72,19 @@ public class GeoHashUtils {
/**
* Encode from geohash string to the geohash based long format (lon/lat interleaved, 4 least significant bits = level)
*/
public static final long longEncode(final String hash) {
int level = hash.length()-1;
private static long longEncode(final String hash, int length) {
int level = length - 1;
long b;
long l = 0L;
for(char c : hash.toCharArray()) {
b = (long)(BASE_32_STRING.indexOf(c));
l |= (b<<(level--*5));
if (level < 0) {
// We cannot handle more than 12 levels
break;
}
}
return (l<<4)|hash.length();
return (l << 4) | length;
}
/**
@ -173,6 +177,10 @@ public class GeoHashUtils {
for(char c : hash.toCharArray()) {
b = (long)(BASE_32_STRING.indexOf(c));
l |= (b<<((level--*5) + MORTON_OFFSET));
if (level < 0) {
// We cannot handle more than 12 levels
break;
}
}
return BitUtil.flipFlop(l);
}
@ -200,13 +208,14 @@ public class GeoHashUtils {
public static Rectangle bbox(final String geohash) {
// bottom left is the coordinate
GeoPoint bottomLeft = GeoPoint.fromGeohash(geohash);
long ghLong = longEncode(geohash);
int len = Math.min(12, geohash.length());
long ghLong = longEncode(geohash, len);
// shift away the level
ghLong >>>= 4;
// deinterleave and add 1 to lat and lon to get topRight
long lat = BitUtil.deinterleave(ghLong >>> 1) + 1;
long lon = BitUtil.deinterleave(ghLong) + 1;
GeoPoint topRight = GeoPoint.fromGeohash(BitUtil.interleave((int)lon, (int)lat) << 4 | geohash.length());
GeoPoint topRight = GeoPoint.fromGeohash(BitUtil.interleave((int)lon, (int)lat) << 4 | len);
return new Rectangle(bottomLeft.lat(), topRight.lat(), bottomLeft.lon(), topRight.lon());
}

View File

@ -82,4 +82,20 @@ public class GeoHashTests extends ESTestCase {
assertEquals("xbpbpbpbpbpb", GeoHashUtils.stringEncode(180, 0));
assertEquals("zzzzzzzzzzzz", GeoHashUtils.stringEncode(180, 90));
}
public void testLongGeohashes() {
for (int i = 0; i < 100000; i++) {
String geohash = randomGeohash(12, 12);
GeoPoint expected = GeoPoint.fromGeohash(geohash);
// Adding some random geohash characters at the end
String extendedGeohash = geohash + randomGeohash(1, 10);
GeoPoint actual = GeoPoint.fromGeohash(extendedGeohash);
assertEquals("Additional data points above 12 should be ignored [" + extendedGeohash + "]" , expected, actual);
Rectangle expectedBbox = GeoHashUtils.bbox(geohash);
Rectangle actualBbox = GeoHashUtils.bbox(extendedGeohash);
assertEquals("Additional data points above 12 should be ignored [" + extendedGeohash + "]" , expectedBbox, actualBbox);
}
}
}