From 20de7fd95a66da9562bda6b1310e2fcbc464ddac Mon Sep 17 00:00:00 2001 From: Pranav Date: Mon, 1 Apr 2024 02:28:03 -0700 Subject: [PATCH] Geo spatial interfaces (#16029) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR creates an interface for ImmutableRTree and moved the existing implementation to new class which represent 32 bit implementation (stores coordinate as floats). This PR makes the ImmutableRTree extendable to create higher precision implementation as well (64 bit). In all spatial bound filters, we accept float as input which might not be accurate in the case of high precision implementation of ImmutableRTree. This PR changed the bound filters to accepts the query bounds as double instead of float and it is backward compatible change as it compares double to existing float values in RTree. Previously it was comparing input float to RTree floats which can cause precision loss, now it is little better as it compares double to float which is still not 100% accurate. There are no changes in the way that we query spatial dimension today except input bound parsing. There is little improvement in string filter predicate which now parse double strings instead of float and compares double to double which is 100% accurate but string predicate is only called when we dont have spatial index. With allowing the interface to extend ImmutableRTree, we allow to create high precision (HP) implementation and defines new search strategies to perform HP search Iterable search(ImmutableDoubleNode node, Bound bound); With possible HP implementations, Radius bound filter can not really focus on accuracy, it is calculating Euclidean distance in comparing. As EARTH 🌍 is round and not flat, Euclidean distances are not accurate in geo system. This PR adds new param called 'radiusUnit' which allows you to specify units like meters, km, miles etc. It uses https://en.wikipedia.org/wiki/Haversine_formula to check if given geo point falls inside circle or not. Added a test that generates set of points inside and outside in RadiusBoundTest. --- docs/querying/geo.md | 5 +- .../spatial/BaseImmutableRTee.java | 28 +++ .../spatial/ImmutableFloatNode.java | 230 ++++++++++++++++++ ...blePoint.java => ImmutableFloatPoint.java} | 8 +- .../collections/spatial/ImmutableNode.java | 193 +-------------- .../collections/spatial/ImmutableRTree.java | 5 +- .../druid/collections/spatial/Node.java | 2 +- .../druid/collections/spatial/RTreeUtils.java | 32 +++ .../collections/spatial/search/Bound.java | 9 +- .../spatial/search/GutmanSearchStrategy.java | 22 +- .../spatial/search/PolygonBound.java | 8 +- .../spatial/search/RadiusBound.java | 88 +++++-- .../spatial/search/RectangularBound.java | 12 +- .../spatial/search/SearchStrategy.java | 4 +- .../DictionaryEncodedColumnMerger.java | 2 +- .../segment/index/semantic/SpatialIndex.java | 4 +- .../spatial/ImmutableRTreeTest.java | 28 +++ .../collections/spatial/SpatialUtils.java | 60 +++++ .../spatial/search/PolygonBoundTest.java | 18 ++ .../spatial/search/RadiusBoundTest.java | 72 ++++++ .../spatial/search/RectangularBoundTest.java | 43 ++++ website/.spelling | 3 + 22 files changed, 640 insertions(+), 236 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/collections/spatial/BaseImmutableRTee.java create mode 100644 processing/src/main/java/org/apache/druid/collections/spatial/ImmutableFloatNode.java rename processing/src/main/java/org/apache/druid/collections/spatial/{ImmutablePoint.java => ImmutableFloatPoint.java} (88%) create mode 100644 processing/src/test/java/org/apache/druid/collections/spatial/SpatialUtils.java diff --git a/docs/querying/geo.md b/docs/querying/geo.md index 065f13eb81e..56c4645896f 100644 --- a/docs/querying/geo.md +++ b/docs/querying/geo.md @@ -142,8 +142,9 @@ The `radius` bound has the following elements: |Property|Description|Required| |--------|-----------|--------| -|`coords`|Origin coordinates in the form [x, y]|yes| -|`radius`|The float radius value|yes| +|`coords`|Center coordinates in the form [x, y]|yes| +|`radius`|The float radius value according to specified unit|yes| +|`radiusUnit`|String value of radius unit in lowercase, default value is 'euclidean'. Allowed units are euclidean, meters, miles, kilometers.|no| #### Polygon diff --git a/processing/src/main/java/org/apache/druid/collections/spatial/BaseImmutableRTee.java b/processing/src/main/java/org/apache/druid/collections/spatial/BaseImmutableRTee.java new file mode 100644 index 00000000000..f342c226168 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/collections/spatial/BaseImmutableRTee.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.collections.spatial; + +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.collections.spatial.search.Bound; + +public interface BaseImmutableRTee +{ + Iterable search(Bound bound); +} diff --git a/processing/src/main/java/org/apache/druid/collections/spatial/ImmutableFloatNode.java b/processing/src/main/java/org/apache/druid/collections/spatial/ImmutableFloatNode.java new file mode 100644 index 00000000000..70e0f7c9f05 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/collections/spatial/ImmutableFloatNode.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.collections.spatial; + +import org.apache.druid.collections.bitmap.BitmapFactory; +import org.apache.druid.collections.bitmap.ImmutableBitmap; + +import java.nio.ByteBuffer; +import java.util.Iterator; + +/** + * Byte layout: + * Header + * 0 to 1 : the MSB is a boolean flag for isLeaf, the next 15 bits represent the number of children of a node + * Body + * 2 to 2 + numDims * Float.BYTES : minCoordinates + * 2 + numDims * Float.BYTES to 2 + 2 * numDims * Float.BYTES : maxCoordinates + * concise set + * rest (children) : Every 4 bytes is storing an offset representing the position of a child. + * + * The child offset is an offset from the initialOffset + */ +public class ImmutableFloatNode implements ImmutableNode +{ + public static final int HEADER_NUM_BYTES = 2; + + private final int numDims; + private final int initialOffset; + private final int offsetFromInitial; + + private final short numChildren; + private final boolean isLeaf; + private final int childrenOffset; + + private final ByteBuffer data; + + private final BitmapFactory bitmapFactory; + + public ImmutableFloatNode( + int numDims, + int initialOffset, + int offsetFromInitial, + ByteBuffer data, + BitmapFactory bitmapFactory + ) + { + this.bitmapFactory = bitmapFactory; + this.numDims = numDims; + this.initialOffset = initialOffset; + this.offsetFromInitial = offsetFromInitial; + short header = data.getShort(initialOffset + offsetFromInitial); + this.isLeaf = (header & 0x8000) != 0; + this.numChildren = (short) (header & 0x7FFF); + final int sizePosition = initialOffset + offsetFromInitial + HEADER_NUM_BYTES + 2 * numDims * Float.BYTES; + int bitmapSize = data.getInt(sizePosition); + this.childrenOffset = initialOffset + + offsetFromInitial + + HEADER_NUM_BYTES + + 2 * numDims * Float.BYTES + + Integer.BYTES + + bitmapSize; + + this.data = data; + } + + public ImmutableFloatNode( + int numDims, + int initialOffset, + int offsetFromInitial, + short numChildren, + boolean leaf, + ByteBuffer data, + BitmapFactory bitmapFactory + ) + { + this.bitmapFactory = bitmapFactory; + this.numDims = numDims; + this.initialOffset = initialOffset; + this.offsetFromInitial = offsetFromInitial; + this.numChildren = numChildren; + this.isLeaf = leaf; + final int sizePosition = initialOffset + offsetFromInitial + HEADER_NUM_BYTES + 2 * numDims * Float.BYTES; + int bitmapSize = data.getInt(sizePosition); + this.childrenOffset = initialOffset + + offsetFromInitial + + HEADER_NUM_BYTES + + 2 * numDims * Float.BYTES + + Integer.BYTES + + bitmapSize; + + this.data = data; + } + + @Override + public BitmapFactory getBitmapFactory() + { + return bitmapFactory; + } + + @Override + public int getInitialOffset() + { + return initialOffset; + } + + @Override + public int getOffsetFromInitial() + { + return offsetFromInitial; + } + + @Override + public int getNumDims() + { + return numDims; + } + + @Override + public boolean isLeaf() + { + return isLeaf; + } + + @Override + public float[] getMinCoordinates() + { + return getCoords(initialOffset + offsetFromInitial + HEADER_NUM_BYTES); + } + + @Override + public float[] getMaxCoordinates() + { + return getCoords(initialOffset + offsetFromInitial + HEADER_NUM_BYTES + numDims * Float.BYTES); + } + + @Override + public ImmutableBitmap getImmutableBitmap() + { + final int sizePosition = initialOffset + offsetFromInitial + HEADER_NUM_BYTES + 2 * numDims * Float.BYTES; + int numBytes = data.getInt(sizePosition); + data.position(sizePosition + Integer.BYTES); + ByteBuffer tmpBuffer = data.slice(); + tmpBuffer.limit(numBytes); + return bitmapFactory.mapImmutableBitmap(tmpBuffer.asReadOnlyBuffer()); + } + + @Override + @SuppressWarnings("ArgumentParameterSwap") + public Iterable> getChildren() + { + return new Iterable>() + { + @Override + public Iterator> iterator() + { + return new Iterator>() + { + private int count = 0; + + @Override + public boolean hasNext() + { + return (count < numChildren); + } + + @Override + public ImmutableNode next() + { + if (isLeaf) { + return new ImmutableFloatPoint( + numDims, + initialOffset, + data.getInt(childrenOffset + (count++) * Integer.BYTES), + data, + bitmapFactory + ); + } + return new ImmutableFloatNode( + numDims, + initialOffset, + data.getInt(childrenOffset + (count++) * Integer.BYTES), + data, + bitmapFactory + ); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + }; + } + + @Override + public ByteBuffer getData() + { + return data; + } + + private float[] getCoords(int offset) + { + final float[] retVal = new float[numDims]; + + final ByteBuffer readOnlyBuffer = data.asReadOnlyBuffer(); + readOnlyBuffer.position(offset); + readOnlyBuffer.asFloatBuffer().get(retVal); + + return retVal; + } +} diff --git a/processing/src/main/java/org/apache/druid/collections/spatial/ImmutablePoint.java b/processing/src/main/java/org/apache/druid/collections/spatial/ImmutableFloatPoint.java similarity index 88% rename from processing/src/main/java/org/apache/druid/collections/spatial/ImmutablePoint.java rename to processing/src/main/java/org/apache/druid/collections/spatial/ImmutableFloatPoint.java index e8eb9ab9e1a..5fc629703d1 100644 --- a/processing/src/main/java/org/apache/druid/collections/spatial/ImmutablePoint.java +++ b/processing/src/main/java/org/apache/druid/collections/spatial/ImmutableFloatPoint.java @@ -23,9 +23,9 @@ import org.apache.druid.collections.bitmap.BitmapFactory; import java.nio.ByteBuffer; -public class ImmutablePoint extends ImmutableNode +public class ImmutableFloatPoint extends ImmutableFloatNode { - public ImmutablePoint( + public ImmutableFloatPoint( int numDims, int initialOffset, int offsetFromInitial, @@ -36,7 +36,7 @@ public class ImmutablePoint extends ImmutableNode super(numDims, initialOffset, offsetFromInitial, (short) 0, true, data, bitmapFactory); } - public ImmutablePoint(ImmutableNode node) + public ImmutableFloatPoint(ImmutableNode node) { super( node.getNumDims(), @@ -55,7 +55,7 @@ public class ImmutablePoint extends ImmutableNode } @Override - public Iterable getChildren() + public Iterable> getChildren() { // should never get here throw new UnsupportedOperationException(); diff --git a/processing/src/main/java/org/apache/druid/collections/spatial/ImmutableNode.java b/processing/src/main/java/org/apache/druid/collections/spatial/ImmutableNode.java index dae71985ebd..31d83512fa7 100644 --- a/processing/src/main/java/org/apache/druid/collections/spatial/ImmutableNode.java +++ b/processing/src/main/java/org/apache/druid/collections/spatial/ImmutableNode.java @@ -23,198 +23,27 @@ import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.ImmutableBitmap; import java.nio.ByteBuffer; -import java.util.Iterator; -/** - * Byte layout: - * Header - * 0 to 1 : the MSB is a boolean flag for isLeaf, the next 15 bits represent the number of children of a node - * Body - * 2 to 2 + numDims * Float.BYTES : minCoordinates - * 2 + numDims * Float.BYTES to 2 + 2 * numDims * Float.BYTES : maxCoordinates - * concise set - * rest (children) : Every 4 bytes is storing an offset representing the position of a child. - * - * The child offset is an offset from the initialOffset - */ -public class ImmutableNode +public interface ImmutableNode { - public static final int HEADER_NUM_BYTES = 2; + BitmapFactory getBitmapFactory(); - private final int numDims; - private final int initialOffset; - private final int offsetFromInitial; + int getInitialOffset(); - private final short numChildren; - private final boolean isLeaf; - private final int childrenOffset; + int getOffsetFromInitial(); - private final ByteBuffer data; + int getNumDims(); - private final BitmapFactory bitmapFactory; + boolean isLeaf(); - public ImmutableNode( - int numDims, - int initialOffset, - int offsetFromInitial, - ByteBuffer data, - BitmapFactory bitmapFactory - ) - { - this.bitmapFactory = bitmapFactory; - this.numDims = numDims; - this.initialOffset = initialOffset; - this.offsetFromInitial = offsetFromInitial; - short header = data.getShort(initialOffset + offsetFromInitial); - this.isLeaf = (header & 0x8000) != 0; - this.numChildren = (short) (header & 0x7FFF); - final int sizePosition = initialOffset + offsetFromInitial + HEADER_NUM_BYTES + 2 * numDims * Float.BYTES; - int bitmapSize = data.getInt(sizePosition); - this.childrenOffset = initialOffset - + offsetFromInitial - + HEADER_NUM_BYTES - + 2 * numDims * Float.BYTES - + Integer.BYTES - + bitmapSize; + TCoordinatesArray getMinCoordinates(); - this.data = data; - } + TCoordinatesArray getMaxCoordinates(); - public ImmutableNode( - int numDims, - int initialOffset, - int offsetFromInitial, - short numChildren, - boolean leaf, - ByteBuffer data, - BitmapFactory bitmapFactory - ) - { - this.bitmapFactory = bitmapFactory; - this.numDims = numDims; - this.initialOffset = initialOffset; - this.offsetFromInitial = offsetFromInitial; - this.numChildren = numChildren; - this.isLeaf = leaf; - final int sizePosition = initialOffset + offsetFromInitial + HEADER_NUM_BYTES + 2 * numDims * Float.BYTES; - int bitmapSize = data.getInt(sizePosition); - this.childrenOffset = initialOffset - + offsetFromInitial - + HEADER_NUM_BYTES - + 2 * numDims * Float.BYTES - + Integer.BYTES - + bitmapSize; - - this.data = data; - } - - public BitmapFactory getBitmapFactory() - { - return bitmapFactory; - } - - public int getInitialOffset() - { - return initialOffset; - } - - public int getOffsetFromInitial() - { - return offsetFromInitial; - } - - public int getNumDims() - { - return numDims; - } - - public boolean isLeaf() - { - return isLeaf; - } - - public float[] getMinCoordinates() - { - return getCoords(initialOffset + offsetFromInitial + HEADER_NUM_BYTES); - } - - public float[] getMaxCoordinates() - { - return getCoords(initialOffset + offsetFromInitial + HEADER_NUM_BYTES + numDims * Float.BYTES); - } - - public ImmutableBitmap getImmutableBitmap() - { - final int sizePosition = initialOffset + offsetFromInitial + HEADER_NUM_BYTES + 2 * numDims * Float.BYTES; - int numBytes = data.getInt(sizePosition); - data.position(sizePosition + Integer.BYTES); - ByteBuffer tmpBuffer = data.slice(); - tmpBuffer.limit(numBytes); - return bitmapFactory.mapImmutableBitmap(tmpBuffer.asReadOnlyBuffer()); - } + ImmutableBitmap getImmutableBitmap(); @SuppressWarnings("ArgumentParameterSwap") - public Iterable getChildren() - { - return new Iterable() - { - @Override - public Iterator iterator() - { - return new Iterator() - { - private int count = 0; + Iterable> getChildren(); - @Override - public boolean hasNext() - { - return (count < numChildren); - } - - @Override - public ImmutableNode next() - { - if (isLeaf) { - return new ImmutablePoint( - numDims, - initialOffset, - data.getInt(childrenOffset + (count++) * Integer.BYTES), - data, - bitmapFactory - ); - } - return new ImmutableNode( - numDims, - initialOffset, - data.getInt(childrenOffset + (count++) * Integer.BYTES), - data, - bitmapFactory - ); - } - - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } - }; - } - }; - } - - public ByteBuffer getData() - { - return data; - } - - private float[] getCoords(int offset) - { - final float[] retVal = new float[numDims]; - - final ByteBuffer readOnlyBuffer = data.asReadOnlyBuffer(); - readOnlyBuffer.position(offset); - readOnlyBuffer.asFloatBuffer().get(retVal); - - return retVal; - } + ByteBuffer getData(); } diff --git a/processing/src/main/java/org/apache/druid/collections/spatial/ImmutableRTree.java b/processing/src/main/java/org/apache/druid/collections/spatial/ImmutableRTree.java index 31955cfd9d0..8d13d0b9ef3 100644 --- a/processing/src/main/java/org/apache/druid/collections/spatial/ImmutableRTree.java +++ b/processing/src/main/java/org/apache/druid/collections/spatial/ImmutableRTree.java @@ -36,7 +36,7 @@ import java.nio.ByteBuffer; /** * An immutable representation of an {@link RTree} for spatial indexing. */ -public final class ImmutableRTree implements Comparable +public final class ImmutableRTree implements Comparable, BaseImmutableRTee { private static final byte VERSION = 0x0; @@ -65,7 +65,7 @@ public final class ImmutableRTree implements Comparable Preconditions.checkArgument(data.get(initPosition) == VERSION, "Mismatching versions"); this.numDims = data.getInt(1 + initPosition) & 0x7FFF; this.data = data; - this.root = new ImmutableNode(numDims, initPosition, 1 + Integer.BYTES, data, bitmapFactory); + this.root = new ImmutableFloatNode(numDims, initPosition, 1 + Integer.BYTES, data, bitmapFactory); } public static ImmutableRTree newImmutableFromMutable(RTree rTree) @@ -116,6 +116,7 @@ public final class ImmutableRTree implements Comparable return data.remaining(); } + @Override public Iterable search(Bound bound) { return search(defaultSearchStrategy, bound); diff --git a/processing/src/main/java/org/apache/druid/collections/spatial/Node.java b/processing/src/main/java/org/apache/druid/collections/spatial/Node.java index 96f476bfb23..16dfdd43226 100644 --- a/processing/src/main/java/org/apache/druid/collections/spatial/Node.java +++ b/processing/src/main/java/org/apache/druid/collections/spatial/Node.java @@ -193,7 +193,7 @@ public class Node public int getSizeInBytes() { - return ImmutableNode.HEADER_NUM_BYTES + return ImmutableFloatNode.HEADER_NUM_BYTES + 2 * getNumDims() * Float.BYTES + Integer.BYTES // size of the set + bitmap.getSizeInBytes() diff --git a/processing/src/main/java/org/apache/druid/collections/spatial/RTreeUtils.java b/processing/src/main/java/org/apache/druid/collections/spatial/RTreeUtils.java index ca8bc6b464e..cfed4a210e7 100644 --- a/processing/src/main/java/org/apache/druid/collections/spatial/RTreeUtils.java +++ b/processing/src/main/java/org/apache/druid/collections/spatial/RTreeUtils.java @@ -71,4 +71,36 @@ public class RTreeUtils } } + /** + * Returns distance between two geo coordinates in meters according to https://en.wikipedia.org/wiki/Haversine_formula + */ + public static double calculateHaversineDistance( + final double lat1, + final double lon1, + final double lat2, + final double lon2 + ) + { + // Convert degrees to radians + double radLat1 = Math.toRadians(lat1); + double radLon1 = Math.toRadians(lon1); + double radLat2 = Math.toRadians(lat2); + double radLon2 = Math.toRadians(lon2); + + // Haversine formula + double dLat = radLat2 - radLat1; + double dLon = radLon2 - radLon1; + + double a = Math.sin(dLat / 2) * Math.sin(dLat / 2) + + Math.cos(radLat1) * Math.cos(radLat2) * + Math.sin(dLon / 2) * Math.sin(dLon / 2); + + double c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a)); + + // Radius of Earth in meters (use 6371e3 for kilometers) + double radius = 6371000.0; + + return radius * c; + } + } diff --git a/processing/src/main/java/org/apache/druid/collections/spatial/search/Bound.java b/processing/src/main/java/org/apache/druid/collections/spatial/search/Bound.java index 4ba0a386347..2d4385a1be2 100644 --- a/processing/src/main/java/org/apache/druid/collections/spatial/search/Bound.java +++ b/processing/src/main/java/org/apache/druid/collections/spatial/search/Bound.java @@ -23,7 +23,6 @@ import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; import org.apache.druid.annotations.SubclassesMustOverrideEqualsAndHashCode; import org.apache.druid.collections.spatial.ImmutableNode; -import org.apache.druid.collections.spatial.ImmutablePoint; /** */ @@ -34,17 +33,17 @@ import org.apache.druid.collections.spatial.ImmutablePoint; @JsonSubTypes.Type(name = "polygon", value = PolygonBound.class) }) @SubclassesMustOverrideEqualsAndHashCode -public interface Bound +public interface Bound> { int getLimit(); int getNumDims(); - boolean overlaps(ImmutableNode node); + boolean overlaps(ImmutableNode node); - boolean contains(float[] coords); + boolean contains(TCoordinateArray coords); - Iterable filter(Iterable points); + Iterable filter(Iterable points); byte[] getCacheKey(); } diff --git a/processing/src/main/java/org/apache/druid/collections/spatial/search/GutmanSearchStrategy.java b/processing/src/main/java/org/apache/druid/collections/spatial/search/GutmanSearchStrategy.java index de7d56edcd5..7070a29c66d 100644 --- a/processing/src/main/java/org/apache/druid/collections/spatial/search/GutmanSearchStrategy.java +++ b/processing/src/main/java/org/apache/druid/collections/spatial/search/GutmanSearchStrategy.java @@ -23,15 +23,15 @@ import com.google.common.base.Function; import com.google.common.base.Predicate; import com.google.common.collect.Iterables; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.collections.spatial.ImmutableFloatPoint; import org.apache.druid.collections.spatial.ImmutableNode; -import org.apache.druid.collections.spatial.ImmutablePoint; /** */ -public class GutmanSearchStrategy implements SearchStrategy +public class GutmanSearchStrategy> implements SearchStrategy { @Override - public Iterable search(ImmutableNode node, Bound bound) + public Iterable search(ImmutableNode node, Bound bound) { if (bound.getLimit() > 0) { return Iterables.transform( @@ -49,10 +49,10 @@ public class GutmanSearchStrategy implements SearchStrategy return Iterables.transform( depthFirstSearch(node, bound), - new Function() + new Function() { @Override - public ImmutableBitmap apply(ImmutablePoint immutablePoint) + public ImmutableBitmap apply(ImmutableFloatPoint immutablePoint) { return immutablePoint.getImmutableBitmap(); } @@ -60,18 +60,18 @@ public class GutmanSearchStrategy implements SearchStrategy ); } - public Iterable depthFirstSearch(ImmutableNode node, final Bound bound) + public Iterable depthFirstSearch(ImmutableNode node, final Bound bound) { if (node.isLeaf()) { return bound.filter( Iterables.transform( node.getChildren(), - new Function() + new Function() { @Override - public ImmutablePoint apply(ImmutableNode tNode) + public ImmutableFloatPoint apply(ImmutableNode tNode) { - return new ImmutablePoint(tNode); + return new ImmutableFloatPoint(tNode); } } ) @@ -90,10 +90,10 @@ public class GutmanSearchStrategy implements SearchStrategy } } ), - new Function>() + new Function>() { @Override - public Iterable apply(ImmutableNode child) + public Iterable apply(ImmutableNode child) { return depthFirstSearch(child, bound); } diff --git a/processing/src/main/java/org/apache/druid/collections/spatial/search/PolygonBound.java b/processing/src/main/java/org/apache/druid/collections/spatial/search/PolygonBound.java index d3b0b47f6e4..68dc2174a89 100644 --- a/processing/src/main/java/org/apache/druid/collections/spatial/search/PolygonBound.java +++ b/processing/src/main/java/org/apache/druid/collections/spatial/search/PolygonBound.java @@ -24,7 +24,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.collect.Iterables; -import org.apache.druid.collections.spatial.ImmutablePoint; +import org.apache.druid.collections.spatial.ImmutableFloatPoint; import java.nio.ByteBuffer; import java.util.Arrays; @@ -161,14 +161,14 @@ public class PolygonBound extends RectangularBound } @Override - public Iterable filter(Iterable points) + public Iterable filter(Iterable points) { return Iterables.filter( points, - new Predicate() + new Predicate() { @Override - public boolean apply(ImmutablePoint immutablePoint) + public boolean apply(ImmutableFloatPoint immutablePoint) { return contains(immutablePoint.getCoords()); } diff --git a/processing/src/main/java/org/apache/druid/collections/spatial/search/RadiusBound.java b/processing/src/main/java/org/apache/druid/collections/spatial/search/RadiusBound.java index 87f9bccd152..0a817ce5849 100644 --- a/processing/src/main/java/org/apache/druid/collections/spatial/search/RadiusBound.java +++ b/processing/src/main/java/org/apache/druid/collections/spatial/search/RadiusBound.java @@ -23,29 +23,53 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Predicate; import com.google.common.collect.Iterables; -import org.apache.druid.collections.spatial.ImmutablePoint; +import org.apache.druid.collections.spatial.ImmutableFloatPoint; +import org.apache.druid.collections.spatial.RTreeUtils; +import javax.annotation.Nullable; import java.nio.ByteBuffer; /** + * */ public class RadiusBound extends RectangularBound { private static final byte CACHE_TYPE_ID = 0x01; private final float[] coords; private final float radius; + private final RadiusUnit radiusUnit; @JsonCreator public RadiusBound( @JsonProperty("coords") float[] coords, @JsonProperty("radius") float radius, - @JsonProperty("limit") int limit + @JsonProperty("limit") int limit, + @JsonProperty("radiusUnit") @Nullable RadiusUnit radiusUnit ) { super(getMinCoords(coords, radius), getMaxCoords(coords, radius), limit); this.coords = coords; this.radius = radius; + this.radiusUnit = radiusUnit == null ? RadiusUnit.euclidean : radiusUnit; + } + + public RadiusBound( + float[] coords, + float radius, + int limit + ) + { + this(coords, radius, limit, null); + } + + public RadiusBound( + float[] coords, + float radius, + RadiusUnit radiusUnit + ) + { + this(coords, radius, 0, radiusUnit); } public RadiusBound( @@ -53,7 +77,7 @@ public class RadiusBound extends RectangularBound float radius ) { - this(coords, radius, 0); + this(coords, radius, 0, null); } private static float[] getMinCoords(float[] coords, float radius) @@ -86,26 +110,40 @@ public class RadiusBound extends RectangularBound return radius; } - @Override - public boolean contains(float[] otherCoords) + @JsonProperty + public RadiusUnit getRadiusUnit() { - double total = 0.0; - for (int i = 0; i < coords.length; i++) { - total += Math.pow(otherCoords[i] - coords[i], 2); - } - - return (total <= Math.pow(radius, 2)); + return radiusUnit; } @Override - public Iterable filter(Iterable points) + public boolean contains(float[] otherCoords) + { + if (otherCoords.length < 2 || coords.length < 2) { + return false; + } + if (radiusUnit == RadiusUnit.euclidean) { + double total = 0.0; + for (int i = 0; i < coords.length; i++) { + total += Math.pow(otherCoords[i] - coords[i], 2); + } + return (total <= Math.pow(radius, 2)); + } else { + double radiusInMeters = getRadius() * radiusUnit.getMetersMultiFactor(); + double distance = RTreeUtils.calculateHaversineDistance(coords[0], coords[1], otherCoords[0], otherCoords[1]); + return distance <= radiusInMeters; + } + } + + @Override + public Iterable filter(Iterable points) { return Iterables.filter( points, - new Predicate() + new Predicate() { @Override - public boolean apply(ImmutablePoint point) + public boolean apply(ImmutableFloatPoint point) { return contains(point.getCoords()); } @@ -127,4 +165,26 @@ public class RadiusBound extends RectangularBound .put(CACHE_TYPE_ID); return cacheKey.array(); } + + public enum RadiusUnit + { + meters(1), + euclidean(1), + @SuppressWarnings("unused") // will be used in high precision filtering + miles(1609.344f), + @SuppressWarnings("unused") + kilometers(1000); + + float metersMultiFactor; + + RadiusUnit(float mmf) + { + this.metersMultiFactor = mmf; + } + + public float getMetersMultiFactor() + { + return metersMultiFactor; + } + } } diff --git a/processing/src/main/java/org/apache/druid/collections/spatial/search/RectangularBound.java b/processing/src/main/java/org/apache/druid/collections/spatial/search/RectangularBound.java index 79f2f688d60..621b66a638c 100644 --- a/processing/src/main/java/org/apache/druid/collections/spatial/search/RectangularBound.java +++ b/processing/src/main/java/org/apache/druid/collections/spatial/search/RectangularBound.java @@ -24,8 +24,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.collect.Iterables; +import org.apache.druid.collections.spatial.ImmutableFloatPoint; import org.apache.druid.collections.spatial.ImmutableNode; -import org.apache.druid.collections.spatial.ImmutablePoint; import java.nio.ByteBuffer; import java.util.Arrays; @@ -33,7 +33,7 @@ import java.util.Objects; /** */ -public class RectangularBound implements Bound +public class RectangularBound implements Bound { private static final byte CACHE_TYPE_ID = 0x0; @@ -92,7 +92,7 @@ public class RectangularBound implements Bound } @Override - public boolean overlaps(ImmutableNode node) + public boolean overlaps(ImmutableNode node) { final float[] nodeMinCoords = node.getMinCoordinates(); final float[] nodeMaxCoords = node.getMaxCoordinates(); @@ -119,14 +119,14 @@ public class RectangularBound implements Bound } @Override - public Iterable filter(Iterable points) + public Iterable filter(Iterable points) { return Iterables.filter( points, - new Predicate() + new Predicate() { @Override - public boolean apply(ImmutablePoint immutablePoint) + public boolean apply(ImmutableFloatPoint immutablePoint) { return contains(immutablePoint.getCoords()); } diff --git a/processing/src/main/java/org/apache/druid/collections/spatial/search/SearchStrategy.java b/processing/src/main/java/org/apache/druid/collections/spatial/search/SearchStrategy.java index 712c56bc1bc..1eca158044f 100644 --- a/processing/src/main/java/org/apache/druid/collections/spatial/search/SearchStrategy.java +++ b/processing/src/main/java/org/apache/druid/collections/spatial/search/SearchStrategy.java @@ -25,7 +25,7 @@ import org.apache.druid.collections.spatial.ImmutableNode; /** */ -public interface SearchStrategy +public interface SearchStrategy> { - Iterable search(ImmutableNode node, Bound bound); + Iterable search(ImmutableNode node, Bound bound); } diff --git a/processing/src/main/java/org/apache/druid/segment/DictionaryEncodedColumnMerger.java b/processing/src/main/java/org/apache/druid/segment/DictionaryEncodedColumnMerger.java index f1d9d7c5bb4..b11498137fe 100644 --- a/processing/src/main/java/org/apache/druid/segment/DictionaryEncodedColumnMerger.java +++ b/processing/src/main/java/org/apache/druid/segment/DictionaryEncodedColumnMerger.java @@ -685,7 +685,7 @@ public abstract class DictionaryEncodedColumnMerger> imp * {@link DictionaryEncodedColumnMerger#writeIndexes(List)} is called, on top of the standard bitmap index created * with {@link DictionaryEncodedColumnMerger#mergeBitmaps} */ - interface ExtendedIndexesMerger + protected interface ExtendedIndexesMerger { void initialize() throws IOException; diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/SpatialIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/SpatialIndex.java index cf19d5c4d9a..a949a70ed84 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/SpatialIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/SpatialIndex.java @@ -19,11 +19,11 @@ package org.apache.druid.segment.index.semantic; -import org.apache.druid.collections.spatial.ImmutableRTree; +import org.apache.druid.collections.spatial.BaseImmutableRTee; /** */ public interface SpatialIndex { - ImmutableRTree getRTree(); + BaseImmutableRTee getRTree(); } diff --git a/processing/src/test/java/org/apache/druid/collections/spatial/ImmutableRTreeTest.java b/processing/src/test/java/org/apache/druid/collections/spatial/ImmutableRTreeTest.java index 4f28ee62642..94edd78275f 100644 --- a/processing/src/test/java/org/apache/druid/collections/spatial/ImmutableRTreeTest.java +++ b/processing/src/test/java/org/apache/druid/collections/spatial/ImmutableRTreeTest.java @@ -20,6 +20,7 @@ package org.apache.druid.collections.spatial; import com.google.common.base.Stopwatch; +import com.google.common.collect.FluentIterable; import com.google.common.collect.Iterables; import com.google.common.collect.Sets; import com.google.common.primitives.Bytes; @@ -674,4 +675,31 @@ public class ImmutableRTreeTest byte[] bytes2 = deserializedTree.toBytes(); org.junit.Assert.assertEquals(Bytes.asList(bytes1), Bytes.asList(bytes2)); } + + @Test + public void testPreciseRadiusBoundFilter() + { + BitmapFactory bf = new RoaringBitmapFactory(); + RTree tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bf), bf); + float centerLat = 37.4133961f; + float centerLong = -122.1224665f; + float[][] insidePoints = SpatialUtils.generateGeoCoordinatesAroundCircle(centerLat, centerLong, 100, 100, true); + for (int i = 0; i < insidePoints.length; i++) { + tree.insert(insidePoints[i], i); + } + float[][] outsidePoints = SpatialUtils.generateGeoCoordinatesAroundCircle(centerLat, centerLong, 100, 100, false); + for (int i = 0; i < outsidePoints.length; i++) { + tree.insert(outsidePoints[i], i); + } + ImmutableRTree searchTree = ImmutableRTree.newImmutableFromMutable(tree); + Iterable points = searchTree.search(new RadiusBound( + new float[]{centerLat, centerLong}, + 100, + 2, + RadiusBound.RadiusUnit.meters + )); + org.junit.Assert.assertTrue(((FluentIterable) points).toList().size() == 100); + ImmutableBitmap finalSet = bf.union(points); + Assert.assertTrue(finalSet.size() == 100); + } } diff --git a/processing/src/test/java/org/apache/druid/collections/spatial/SpatialUtils.java b/processing/src/test/java/org/apache/druid/collections/spatial/SpatialUtils.java new file mode 100644 index 00000000000..25a425af14e --- /dev/null +++ b/processing/src/test/java/org/apache/druid/collections/spatial/SpatialUtils.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.collections.spatial; + +import java.util.Random; +import java.util.concurrent.ThreadLocalRandom; + +public class SpatialUtils +{ + private static Random random = ThreadLocalRandom.current(); + + public static float[][] generateGeoCoordinatesAroundCircle( + float circleCenterLat, + float circleCenterLon, + float circleRadius, + int numberOfPoints, + boolean shouldBeInside + ) + { + float[][] geoCoordinates = new float[numberOfPoints][2]; + + for (int i = 0; i < numberOfPoints; i++) { + double angle = 2 * Math.PI * random.nextDouble(); + double distance; + if (shouldBeInside) { + // Generate random distance within the circle's radius + distance = circleRadius * Math.sqrt(random.nextDouble()) - 1; + } else { + // Generate random points outside of circle but slightly beyond the circle's radius + distance = circleRadius + 100 * random.nextDouble(); + } + + // Calculate new latitude and longitude + double latitude = circleCenterLat + + distance * Math.cos(angle) / 111000; // 1 degree is approximately 111,000 meters + double longitude = circleCenterLon + distance * Math.sin(angle) / (111000 * Math.cos(Math.toRadians(latitude))); + + geoCoordinates[i][0] = (float) latitude; + geoCoordinates[i][1] = (float) longitude; + } + return geoCoordinates; + } +} diff --git a/processing/src/test/java/org/apache/druid/collections/spatial/search/PolygonBoundTest.java b/processing/src/test/java/org/apache/druid/collections/spatial/search/PolygonBoundTest.java index 3a5d89b73f1..bb3d5862a33 100644 --- a/processing/src/test/java/org/apache/druid/collections/spatial/search/PolygonBoundTest.java +++ b/processing/src/test/java/org/apache/druid/collections/spatial/search/PolygonBoundTest.java @@ -79,6 +79,24 @@ public class PolygonBoundTest Assert.assertFalse(rightTriangle.contains(new float[]{3f, 3f + delta})); } + @Test + public void testHighPrecisions() + { + //37.82460331205531, -122.50851323395436 Black Sand Beach + //37.79378584960722, -122.48344917652936 Bakers Beach + //37.82872192254861, -122.48597242173493 Golden Gate view point + + final PolygonBound triangle = PolygonBound.from( + new float[]{37.82460331205531f, 37.79378584960722f, 37.82872192254861f}, + new float[]{-122.50851323395436f, -122.48344917652936f, -122.48597242173493f} + ); + // points near triangle edges + Assert.assertTrue(triangle.contains(new float[]{37.82668550138975f, -122.48783179067323f})); + Assert.assertTrue(triangle.contains(new float[]{37.813408325545275f, -122.48605838780342f})); + Assert.assertFalse(triangle.contains(new float[]{37.80812634358083f, -122.49676991156807f})); + Assert.assertFalse(triangle.contains(new float[]{37.81832968852414f, -122.4843583756818f})); + } + @Test public void testEqualsContract() { diff --git a/processing/src/test/java/org/apache/druid/collections/spatial/search/RadiusBoundTest.java b/processing/src/test/java/org/apache/druid/collections/spatial/search/RadiusBoundTest.java index 53b0a0006e5..23a232de17e 100644 --- a/processing/src/test/java/org/apache/druid/collections/spatial/search/RadiusBoundTest.java +++ b/processing/src/test/java/org/apache/druid/collections/spatial/search/RadiusBoundTest.java @@ -19,6 +19,10 @@ package org.apache.druid.collections.spatial.search; +import com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.druid.collections.spatial.RTreeUtils; +import org.apache.druid.collections.spatial.SpatialUtils; +import org.apache.druid.jackson.DefaultObjectMapper; import org.junit.Assert; import org.junit.Test; @@ -48,4 +52,72 @@ public class RadiusBoundTest new RadiusBound(coords0, 3.0F, 9).getCacheKey() )); } + + @Test + public void testContains() + { + float circleCenterLat = 12.3456789f; + float circleCenterLon = 45.6789012f; + float circleRadius = 500.0f; // Radius in meters + int numberOfPoints = 1000; + + float[] center = new float[]{circleCenterLat, circleCenterLon}; + Bound bound = new RadiusBound(center, circleRadius, 100, RadiusBound.RadiusUnit.meters); + + float[][] geoInsidePoints = SpatialUtils.generateGeoCoordinatesAroundCircle( + circleCenterLat, + circleCenterLon, + circleRadius, + numberOfPoints, + true + ); + + for (float[] geoPoint : geoInsidePoints) { + double distance = RTreeUtils.calculateHaversineDistance(geoPoint[0], geoPoint[1], center[0], center[1]); + Assert.assertTrue(distance < circleRadius); + Assert.assertTrue(bound.contains(geoPoint)); + float[] floatPoint = new float[]{ + Float.parseFloat(String.valueOf(geoPoint[0])), + Float.parseFloat(String.valueOf(geoPoint[1])) + }; + Assert.assertTrue(bound.contains(floatPoint)); + } + + float[][] geoOutsidePoints = SpatialUtils.generateGeoCoordinatesAroundCircle( + circleCenterLat, + circleCenterLon, + circleRadius, + numberOfPoints, + false + ); + + for (float[] geoPoint : geoOutsidePoints) { + double haversineDistance = RTreeUtils.calculateHaversineDistance(geoPoint[0], geoPoint[1], center[0], center[1]); + Assert.assertTrue(haversineDistance > circleRadius); // asserts that point is outside + Assert.assertFalse(bound.contains(geoPoint)); + float[] floatPoint = new float[]{ + Float.parseFloat(String.valueOf(geoPoint[0])), + Float.parseFloat(String.valueOf(geoPoint[1])) + }; + Assert.assertFalse(bound.contains(floatPoint)); + } + } + + @Test + public void deSerTest() throws JsonProcessingException + { + float circleCenterLat = 12.3456789f; + float circleCenterLon = 45.6789012f; + float circleRadius = 500.0f; // Radius in meters + + float[] center = new float[]{circleCenterLat, circleCenterLon}; + Bound bound = new RadiusBound(center, circleRadius, 100); + DefaultObjectMapper objectMapper = DefaultObjectMapper.INSTANCE; + Bound val = objectMapper.readValue(objectMapper.writeValueAsString(bound), Bound.class); + Assert.assertEquals(bound, val); + + Bound bound1 = new RadiusBound(center, circleRadius, 100, RadiusBound.RadiusUnit.meters); + Bound val1 = objectMapper.readValue(objectMapper.writeValueAsString(bound1), Bound.class); + Assert.assertEquals(bound1, val1); + } } diff --git a/processing/src/test/java/org/apache/druid/collections/spatial/search/RectangularBoundTest.java b/processing/src/test/java/org/apache/druid/collections/spatial/search/RectangularBoundTest.java index c50b70be6ec..0c1418b2811 100644 --- a/processing/src/test/java/org/apache/druid/collections/spatial/search/RectangularBoundTest.java +++ b/processing/src/test/java/org/apache/druid/collections/spatial/search/RectangularBoundTest.java @@ -19,7 +19,9 @@ package org.apache.druid.collections.spatial.search; +import com.fasterxml.jackson.core.JsonProcessingException; import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.jackson.DefaultObjectMapper; import org.junit.Assert; import org.junit.Test; @@ -48,6 +50,47 @@ public class RectangularBoundTest )); } + @Test + public void testRectangularBound() + { + float[][] insidePoints = new float[][]{ + {37.795717853074635f, -122.40906979480418f}, + {37.79625791859653f, -122.39638788940042f}, + {37.79685798676811f, -122.39335030726777f}, + {37.7966179600844f, -122.39798262002006f} + }; + float[][] outsidePoints = new float[][]{ + {37.79805810848854f, -122.39236309307468f}, + {37.78197485768925f, -122.41886599718191f}, + {37.798298130492945f, -122.39608413118715f}, + {37.783595343766216f, -122.41932163450181f} + }; + RectangularBound rectangularBound = new RectangularBound( + new float[]{37.78185482027019f, -122.41795472254213f}, + new float[]{37.797638168104185f, -122.39228715352137f}, + 10 + ); + for (float[] insidePoint : insidePoints) { + Assert.assertTrue(rectangularBound.contains(insidePoint)); + } + for (float[] outsidePoint : outsidePoints) { + Assert.assertFalse(rectangularBound.contains(outsidePoint)); + } + } + + @Test + public void testDeSer() throws JsonProcessingException + { + Bound rectangularBound = new RectangularBound( + new float[]{39.094969f, -84.516996f}, + new float[]{39.095473f, -84.515373f} + ); + DefaultObjectMapper objectMapper = DefaultObjectMapper.INSTANCE; + String val = objectMapper.writeValueAsString(rectangularBound); + Bound deSerVal = objectMapper.readValue(val, Bound.class); + Assert.assertEquals(deSerVal, rectangularBound); + } + @Test public void testEqualsContract() { diff --git a/website/.spelling b/website/.spelling index 37e43c9d0c0..0eaf3b2f0f3 100644 --- a/website/.spelling +++ b/website/.spelling @@ -2164,6 +2164,9 @@ jackson-jq missingValue skipBytesInMemoryOverheadCheck spatialDimensions +radiusUnit +euclidean +kilometers useFieldDiscovery 4CPU cityName