SOLR-11382: Solr support for Lucene spatial Geo3D.

This commit is contained in:
David Smiley 2017-09-23 11:03:39 -04:00
parent 347ab7e86f
commit d6fa057150
7 changed files with 101 additions and 16 deletions

View File

@ -95,6 +95,10 @@ New Features
* SOLR-10962: Make ReplicationHandler's commitReserveDuration configurable in SolrCloud mode. * SOLR-10962: Make ReplicationHandler's commitReserveDuration configurable in SolrCloud mode.
(Ramsey Haddad, Christine Poerschke, hossman) (Ramsey Haddad, Christine Poerschke, hossman)
* SOLR-11382: Lucene's Geo3D (surface of sphere & ellipsoid) is now supported on spatial RPT fields by
setting spatialContextFactory="Geo3D". Furthermore, this is the first time Solr has out of the box
support for polygons. (David Smiley)
Bug Fixes Bug Fixes
---------------------- ----------------------

View File

@ -46,6 +46,7 @@ import org.apache.lucene.spatial.SpatialStrategy;
import org.apache.lucene.spatial.query.SpatialArgs; import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialArgsParser; import org.apache.lucene.spatial.query.SpatialArgsParser;
import org.apache.lucene.spatial.query.SpatialOperation; import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.spatial.spatial4j.Geo3dSpatialContextFactory;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.TextResponseWriter;
@ -131,6 +132,10 @@ public abstract class AbstractSpatialFieldType<T extends SpatialStrategy> extend
argEntry.setValue("org.locationtech.spatial4j.context.jts.JtsSpatialContextFactory"); argEntry.setValue("org.locationtech.spatial4j.context.jts.JtsSpatialContextFactory");
continue; continue;
} }
if (argEntry.getKey().equals(CTX_PARAM) && argEntry.getValue().equals("Geo3D")) {
argEntry.setValue(Geo3dSpatialContextFactory.class.getName());
continue;
}
// Warn about using old Spatial4j class names // Warn about using old Spatial4j class names
if (argEntry.getValue().contains(OLD_SPATIAL4J_PREFIX)) { if (argEntry.getValue().contains(OLD_SPATIAL4J_PREFIX)) {
log.warn("Replace '" + OLD_SPATIAL4J_PREFIX + "' with '" + NEW_SPATIAL4J_PREFIX + "' in your schema."); log.warn("Replace '" + OLD_SPATIAL4J_PREFIX + "' with '" + NEW_SPATIAL4J_PREFIX + "' in your schema.");

View File

@ -56,6 +56,9 @@
<fieldType name="srptgeom" class="solr.RptWithGeometrySpatialField"/> <fieldType name="srptgeom" class="solr.RptWithGeometrySpatialField"/>
<fieldType name="srptgeom_geo3d" class="solr.RptWithGeometrySpatialField"
spatialContextFactory="Geo3D" planetModel="wgs84"/><!-- or sphere -->
<fieldType name="bbox" class="solr.BBoxField" <fieldType name="bbox" class="solr.BBoxField"
numberType="tdoubleDV" distanceUnits="degrees" storeSubFields="false"/> numberType="tdoubleDV" distanceUnits="degrees" storeSubFields="false"/>
@ -75,6 +78,7 @@
<field name="stqpt_geohash" type="stqpt_geohash" multiValued="true"/> <field name="stqpt_geohash" type="stqpt_geohash" multiValued="true"/>
<field name="pointvector" type="pointvector"/> <field name="pointvector" type="pointvector"/>
<field name="srptgeom" type="srptgeom"/> <field name="srptgeom" type="srptgeom"/>
<field name="srptgeom_geo3d" type="srptgeom_geo3d"/>
<field name="bbox" type="bbox"/> <field name="bbox" type="bbox"/>
<field name="pbbox" type="pbbox"/> <field name="pbbox" type="pbbox"/>
<field name="bbox_ndv" type="bbox_ndv"/> <field name="bbox_ndv" type="bbox_ndv"/>

View File

@ -32,5 +32,11 @@
initialSize="0" initialSize="0"
autowarmCount="100%" autowarmCount="100%"
regenerator="solr.NoOpRegenerator"/> regenerator="solr.NoOpRegenerator"/>
<cache name="perSegSpatialFieldCache_srptgeom_geo3d"
class="solr.LRUCache"
size="3"
initialSize="0"
autowarmCount="100%"
regenerator="solr.NoOpRegenerator"/>
</query> </query>
</config> </config>

View File

@ -76,16 +76,16 @@ public class SpatialRPTFieldTypeTest extends AbstractBadConfigTestBase {
assertU(commit()); assertU(commit());
String q; String q;
q = "geo:{!geofilt score=distance filter=false sfield=geo pt="+QUERY_COORDINATES+" d=1000}"; q = "geo:{!geofilt score=distance filter=false sfield=geo pt="+QUERY_COORDINATES+" d=180}";
assertQ(req("q", q, "fl", "*,score"), "//result/doc/float[@name='score'][.='"+DISTANCE_DEGREES+"']"); assertQ(req("q", q, "fl", "*,score"), "//result/doc/float[@name='score'][.='"+DISTANCE_DEGREES+"']");
q = "geo:{!geofilt score=degrees filter=false sfield=geo pt="+QUERY_COORDINATES+" d=1000}"; q = "geo:{!geofilt score=degrees filter=false sfield=geo pt="+QUERY_COORDINATES+" d=180}";
assertQ(req("q", q, "fl", "*,score"), "//result/doc/float[@name='score'][.='"+DISTANCE_DEGREES+"']"); assertQ(req("q", q, "fl", "*,score"), "//result/doc/float[@name='score'][.='"+DISTANCE_DEGREES+"']");
q = "geo:{!geofilt score=kilometers filter=false sfield=geo pt="+QUERY_COORDINATES+" d=1000}"; q = "geo:{!geofilt score=kilometers filter=false sfield=geo pt="+QUERY_COORDINATES+" d=180}";
assertQ(req("q", q, "fl", "*,score"), "//result/doc/float[@name='score'][.='"+DISTANCE_KILOMETERS+"']"); assertQ(req("q", q, "fl", "*,score"), "//result/doc/float[@name='score'][.='"+DISTANCE_KILOMETERS+"']");
q = "geo:{!geofilt score=miles filter=false sfield=geo pt="+QUERY_COORDINATES+" d=1000}"; q = "geo:{!geofilt score=miles filter=false sfield=geo pt="+QUERY_COORDINATES+" d=180}";
assertQ(req("q", q, "fl", "*,score"), "//result/doc/float[@name='score'][.='"+DISTANCE_MILES+"']"); assertQ(req("q", q, "fl", "*,score"), "//result/doc/float[@name='score'][.='"+DISTANCE_MILES+"']");
} }
@ -264,6 +264,10 @@ public class SpatialRPTFieldTypeTest extends AbstractBadConfigTestBase {
if(format!=null) { if(format!=null) {
rptMap.put("format", format); rptMap.put("format", format);
} }
if (random().nextBoolean()) {
// use Geo3D sometimes
rptMap.put("spatialContextFactory", "Geo3D");
}
fieldType.init(oldSchema, rptMap); fieldType.init(oldSchema, rptMap);
fieldType.setTypeName("location_rpt"); fieldType.setTypeName("location_rpt");
SchemaField newField = new SchemaField("geo", fieldType, SchemaField.STORED | SchemaField.INDEXED, null); SchemaField newField = new SchemaField("geo", fieldType, SchemaField.STORED | SchemaField.INDEXED, null);

View File

@ -103,7 +103,22 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
@Test @Test
public void testRptWithGeometryField() throws Exception { public void testRptWithGeometryField() throws Exception {
String fieldName = "srptgeom"; //note: fails with "srpt_geohash" because it's not as precise testRptWithGeometryField("srptgeom");//note: fails with "srpt_geohash" because it's not as precise
}
@Test
public void testRptWithGeometryGeo3dField() throws Exception {
String fieldName = "srptgeom_geo3d";
testRptWithGeometryField(fieldName);
// show off that Geo3D supports polygons
String polygonWKT = "POLYGON((-11 12, 10.5 12, -11 11, -11 12))"; //right-angle triangle
assertJQ(req(
"q", "{!cache=false field f=" + fieldName + "}Intersects(" + polygonWKT + ")",
"sort", "id asc"), "/response/numFound==2");
}
private void testRptWithGeometryField(String fieldName) throws Exception {
assertU(adoc("id", "0", fieldName, "ENVELOPE(-10, 20, 15, 10)")); assertU(adoc("id", "0", fieldName, "ENVELOPE(-10, 20, 15, 10)"));
assertU(adoc("id", "1", fieldName, "BUFFER(POINT(-10 15), 5)"));//circle at top-left corner assertU(adoc("id", "1", fieldName, "BUFFER(POINT(-10 15), 5)"));//circle at top-left corner
assertU(optimize());// one segment. assertU(optimize());// one segment.
@ -118,7 +133,7 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
// The tricky thing is verifying the cache works correctly... // The tricky thing is verifying the cache works correctly...
MetricsMap cacheMetrics = (MetricsMap) h.getCore().getCoreMetricManager().getRegistry().getMetrics().get("CACHE.searcher.perSegSpatialFieldCache_srptgeom"); MetricsMap cacheMetrics = (MetricsMap) h.getCore().getCoreMetricManager().getRegistry().getMetrics().get("CACHE.searcher.perSegSpatialFieldCache_" + fieldName);
assertEquals("1", cacheMetrics.getValue().get("cumulative_inserts").toString()); assertEquals("1", cacheMetrics.getValue().get("cumulative_inserts").toString());
assertEquals("0", cacheMetrics.getValue().get("cumulative_hits").toString()); assertEquals("0", cacheMetrics.getValue().get("cumulative_hits").toString());
@ -140,7 +155,7 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 {
assertJQ(sameReq, "/response/numFound==1", "/response/docs/[0]/id=='1'"); assertJQ(sameReq, "/response/numFound==1", "/response/docs/[0]/id=='1'");
// When there are new segments, we accumulate another hit. This tests the cache was not blown away on commit. // When there are new segments, we accumulate another hit. This tests the cache was not blown away on commit.
// Checking equality for the first reader's cache key indicates wether the cache should still be valid. // Checking equality for the first reader's cache key indicates whether the cache should still be valid.
Object leafKey2 = getFirstLeafReaderKey(); Object leafKey2 = getFirstLeafReaderKey();
assertEquals(leafKey1.equals(leafKey2) ? "2" : "1", cacheMetrics.getValue().get("cumulative_hits").toString()); assertEquals(leafKey1.equals(leafKey2) ? "2" : "1", cacheMetrics.getValue().get("cumulative_hits").toString());

View File

@ -187,9 +187,11 @@ Using the <<the-dismax-query-parser.adoc#the-dismax-query-parser,DisMax>> or <<t
== RPT == RPT
RPT refers to either `SpatialRecursivePrefixTreeFieldType` (aka simply RPT) and an extended version: `RptWithGeometrySpatialField` (aka RPT with Geometry). RPT offers several functional improvements over LatLonPointSpatialField: RPT refers to either `SpatialRecursivePrefixTreeFieldType` (aka simply RPT) and an extended version:
`RptWithGeometrySpatialField` (aka RPT with Geometry).
RPT offers several functional improvements over LatLonPointSpatialField:
* Non-geodetic geo=false general x & y (_not_ latitude and longitude) * Non-geodetic geo=false general x & y (_not_ latitude and longitude) -- if desired
* Query by polygons and other complex shapes, in addition to circles & rectangles * Query by polygons and other complex shapes, in addition to circles & rectangles
* Ability to index non-point shapes (e.g. polygons) as well as points see RptWithGeometrySpatialField * Ability to index non-point shapes (e.g. polygons) as well as points see RptWithGeometrySpatialField
* Heatmap grid faceting * Heatmap grid faceting
@ -198,8 +200,16 @@ RPT _shares_ various features in common with `LatLonPointSpatialField`. Some are
* Latitude/Longitude indexed point data; possibly multi-valued * Latitude/Longitude indexed point data; possibly multi-valued
* Fast filtering with `geofilt`, `bbox` filters, and range query syntax (dateline crossing is supported) * Fast filtering with `geofilt`, `bbox` filters, and range query syntax (dateline crossing is supported)
* Sort/boost via `geodist` * Well-Known-Text (WKT) shape syntax (required for specifying polygons & other complex shapes), and GeoJSON too.
* Well-Known-Text (WKT) shape syntax (required for specifying polygons & other complex shapes), and GeoJSON too. In addition to indexing and searching, this works with the `wt=geojson` (GeoJSON Solr response-writer) and `[geo f=myfield]` (geo Solr document-transformer). In addition to indexing and searching, this works with the `wt=geojson` (GeoJSON Solr response-writer) and `[geo f=myfield]` (geo Solr document-transformer).
* Sort/boost via `geodist` -- _although not recommended_
[TIP]
====
*Important*: Although RPT supports distance sorting/boosting, it is so inefficient at doing this that it might be
removed in the future. Fortunately, you can use LatLonPointSpatialField _as well_ as RPT. Use LLPSF for the distance
sorting/boosting; it only needs to have docValues for this; the index attribute can be disabled as it won't be used.
====
=== Schema Configuration for RPT === Schema Configuration for RPT
@ -251,18 +261,36 @@ A third choice is `packedQuad`, which is generally more efficient than `quad`, p
*_And there are others:_* `normWrapLongitude`, `datelineRule`, `validationRule`, `autoIndex`, `allowMultiOverlap`, `precisionModel`. For further info, see notes below about `spatialContextFactory` implementations referenced above, especially the link to the JTS based one. *_And there are others:_* `normWrapLongitude`, `datelineRule`, `validationRule`, `autoIndex`, `allowMultiOverlap`, `precisionModel`. For further info, see notes below about `spatialContextFactory` implementations referenced above, especially the link to the JTS based one.
=== JTS and Polygons === Standard Shapes
As indicated above, `spatialContextFactory` must be set to `JTS` for polygon support, including multi-polygon. The RPT field types support a set of standard shapes:
points, circles (aka buffered points), envelopes (aka rectangles or bounding boxes), line strings,
polygons, and "multi" variants of these. The envelopes and line strings are Euclidean/cartesian (flat 2D) shapes.
Underlying Solr is the Spatial4j library which implements them. To support other shapes, you can configure the
`spatialContextFactory` attribute on the field type to reference other options. Two are available: JTS and Geo3D.
All other shapes, including even line-strings, are supported without JTS. JTS stands for http://sourceforge.net/projects/jts-topo-suite/[JTS Topology Suite], which does not come with Solr due to its LGPL license. You must download it (a JAR file) and put that in a special location internal to Solr: `SOLR_INSTALL/server/solr-webapp/webapp/WEB-INF/lib/`. You can readily download it here: https://repo1.maven.org/maven2/com/vividsolutions/jts-core/. It will not work if placed in other more typical Solr lib directories, unfortunately. === JTS and Polygons (flat)
When activated, there are additional configuration attributes available; see https://locationtech.github.io/spatial4j/apidocs/org/locationtech/spatial4j/context/jts/JtsSpatialContextFactory.html[org.locationtech.spatial4j.context.jts.JtsSpatialContextFactory] for the Javadocs, and remember to look at the superclass's options in as well. One option in particular you should most likely enable is `autoIndex` (i.e., use JTS's PreparedGeometry) as it's been shown to be a major performance boost for non-trivial polygons. The https://github.com/locationtech/jts[JTS Topology Suite] is a popular computational geometry library with a Euclidean/cartesian (flat 2D) model.
It supports a variety of shapes including polygons, buffering shapes, and some invalid polygon repair fall-backs.
With the help of Spatial4j, included with Solr, the polygons support dateline (anti-meridian) crossing.
Unfortunately Solr cannot include JTS due to its LGPL license.
You must download it (a JAR file) and put that in a special location internal to Solr: `SOLR_INSTALL/server/solr-webapp/webapp/WEB-INF/lib/`.
You can readily download it here: https://repo1.maven.org/maven2/com/vividsolutions/jts-core/.
_It will not work if placed in other more typical Solr lib directories, unfortunately._
JTS's license is expected to be transitioned to BSD by the end of 2017.
Set the `spatialContextFactory` attribute on the field type to `JTS`.
When activated, there are additional configuration attributes available; see
https://locationtech.github.io/spatial4j/apidocs/org/locationtech/spatial4j/context/jts/JtsSpatialContextFactory.html[org.locationtech.spatial4j.context.jts.JtsSpatialContextFactory]
for the Javadocs, and remember to look at the superclass's options as well.
One option in particular you should most likely enable is `autoIndex` (i.e., use JTS's PreparedGeometry) as it's been shown to be a major performance boost for non-trivial polygons.
[source,xml] [source,xml]
---- ----
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
spatialContextFactory="org.locationtech.spatial4j.context.jts.JtsSpatialContextFactory" spatialContextFactory="JTS"
autoIndex="true" autoIndex="true"
validationRule="repairBuffer0" validationRule="repairBuffer0"
distErrPct="0.025" distErrPct="0.025"
@ -281,6 +309,25 @@ Inside the parenthesis following the search predicate is the shape definition. T
Beyond this Reference Guide and Spatila4j's docs, there are some details that remain at the Solr Wiki at http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4. Beyond this Reference Guide and Spatila4j's docs, there are some details that remain at the Solr Wiki at http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4.
=== Geo3D and Polygons (on the ellipsoid)
Geo3D is the colloquial name of the Lucene spatial-3d module, included with Solr.
It's a computational geometry library implementing a variety of shapes (including polygons) on a sphere or WGS84 ellipsoid.
Geo3D is particularly suited for spatial applications where the geometries cover large distances across the globe.
Geo3D is named as-such due to its internal implementation that uses geocentric coordinates (X,Y,Z),
*not* for 3-dimensional geometry, which it does not support.
Despite these internal details, you still supply latitude and longitude as you would normally in Solr.
Set the `spatialContextFactory` attribute on the field type to `Geo3D`.
[source,xml]
----
<fieldType name="geom" class="solr.SpatialRecursivePrefixTreeFieldType"
spatialContextFactory="Geo3D" planetModel="WGS84"/><!-- or "sphere" -->
----
Once the field type has been defined, define a field that uses it.
=== RptWithGeometrySpatialField === RptWithGeometrySpatialField
The `RptWithGeometrySpatialField` field type is a derivative of `SpatialRecursivePrefixTreeFieldType` that also stores the original geometry internally in Lucene DocValues, which it uses to achieve accurate search. It can also be used for indexed point fields. The Intersects predicate (the default) is particularly fast, since many search results can be returned as an accurate hit without requiring a geometry check. This field type is configured just like RPT except that the default `distErrPct` is 0.15 (higher than 0.025) because the grid squares are purely for performance and not to fundamentally represent the shape. The `RptWithGeometrySpatialField` field type is a derivative of `SpatialRecursivePrefixTreeFieldType` that also stores the original geometry internally in Lucene DocValues, which it uses to achieve accurate search. It can also be used for indexed point fields. The Intersects predicate (the default) is particularly fast, since many search results can be returned as an accurate hit without requiring a geometry check. This field type is configured just like RPT except that the default `distErrPct` is 0.15 (higher than 0.025) because the grid squares are purely for performance and not to fundamentally represent the shape.