From 961960f1c2c55810c2e5dbae9dd4d8e7d850fe8a Mon Sep 17 00:00:00 2001 From: David Wayne Smiley Date: Thu, 21 May 2015 13:04:33 +0000 Subject: [PATCH] SOLR-7379: Spatial RptWithGeometrySpatialField (based on CompositeSpatialStrategy) git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1680862 13f79535-47bb-0310-9956-ffa450edef68 --- .../composite/CompositeSpatialStrategy.java | 12 + solr/CHANGES.txt | 4 + .../component/SpatialHeatmapFacets.java | 23 +- .../schema/RptWithGeometrySpatialField.java | 249 ++++++++++++++++++ .../solr/collection1/conf/schema-spatial.xml | 3 + .../collection1/conf/solrconfig-spatial.xml | 36 +++ .../apache/solr/search/TestSolr4Spatial2.java | 59 ++++- 7 files changed, 375 insertions(+), 11 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/schema/RptWithGeometrySpatialField.java create mode 100644 solr/core/src/test-files/solr/collection1/conf/solrconfig-spatial.xml diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/composite/CompositeSpatialStrategy.java b/lucene/spatial/src/java/org/apache/lucene/spatial/composite/CompositeSpatialStrategy.java index ec25230d7d0..e2ae994df4c 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/composite/CompositeSpatialStrategy.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/composite/CompositeSpatialStrategy.java @@ -62,6 +62,18 @@ public class CompositeSpatialStrategy extends SpatialStrategy { this.geometryStrategy = geometryStrategy; } + public RecursivePrefixTreeStrategy getIndexStrategy() { + return indexStrategy; + } + + public SerializedDVStrategy getGeometryStrategy() { + return geometryStrategy; + } + + public boolean isOptimizePredicates() { + return optimizePredicates; + } + /** Set to false to NOT use optimized search predicates that avoid checking the geometry sometimes. Only useful for * benchmarking. */ public void setOptimizePredicates(boolean optimizePredicates) { diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 87f8d7dba60..503668504d3 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -212,6 +212,10 @@ New Features * SOLR-7274: Pluggable authentication module in Solr. This defines an interface and a mechanism to create, load, and use an Authentication plugin. (Noble Paul, Ishan Chattopadhyaya, Gregory Chanan, Anshum Gupta) +* SOLR-7379: (experimental) New spatial RptWithGeometrySpatialField, based on CompositeSpatialStrategy, + which blends RPT indexes for speed with serialized geometry for accuracy. Includes a Lucene segment based + in-memory shape cache. (David Smiley) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/component/SpatialHeatmapFacets.java b/solr/core/src/java/org/apache/solr/handler/component/SpatialHeatmapFacets.java index decec580a9f..554780578a3 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SpatialHeatmapFacets.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SpatialHeatmapFacets.java @@ -47,10 +47,12 @@ import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.schema.AbstractSpatialPrefixTreeFieldType; import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.RptWithGeometrySpatialField; import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SpatialRecursivePrefixTreeFieldType; import org.apache.solr.search.DocSet; import org.apache.solr.search.QueryParsing; +import org.apache.solr.util.DistanceUnits; import org.apache.solr.util.SpatialUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -75,13 +77,24 @@ public class SpatialHeatmapFacets { //get the strategy from the field type final SchemaField schemaField = rb.req.getSchema().getField(fieldName); final FieldType type = schemaField.getType(); - if (!(type instanceof AbstractSpatialPrefixTreeFieldType)) { + + final PrefixTreeStrategy strategy; + final DistanceUnits distanceUnits; + // note: the two instanceof conditions is not ideal, versus one. If we start needing to add more then refactor. + if ((type instanceof AbstractSpatialPrefixTreeFieldType)) { + AbstractSpatialPrefixTreeFieldType rptType = (AbstractSpatialPrefixTreeFieldType) type; + strategy = (PrefixTreeStrategy) rptType.getStrategy(fieldName); + distanceUnits = rptType.getDistanceUnits(); + } else if (type instanceof RptWithGeometrySpatialField) { + RptWithGeometrySpatialField rptSdvType = (RptWithGeometrySpatialField) type; + strategy = rptSdvType.getStrategy(fieldName).getIndexStrategy(); + distanceUnits = rptSdvType.getDistanceUnits(); + } else { //FYI we support the term query one too but few people use that one throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "heatmap field needs to be of type " - + SpatialRecursivePrefixTreeFieldType.class); + + SpatialRecursivePrefixTreeFieldType.class + " or " + RptWithGeometrySpatialField.class); } - AbstractSpatialPrefixTreeFieldType rptType = (AbstractSpatialPrefixTreeFieldType) type; - final PrefixTreeStrategy strategy = (PrefixTreeStrategy) rptType.getStrategy(fieldName); + final SpatialContext ctx = strategy.getSpatialContext(); //get the bbox (query Rectangle) @@ -106,7 +119,7 @@ public class SpatialHeatmapFacets { final Double distErrObj = params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR); if (distErrObj != null) { // convert distErr units based on configured units - spatialArgs.setDistErr(distErrObj * rptType.getDistanceUnits().multiplierFromThisUnitToDegrees()); + spatialArgs.setDistErr(distErrObj * distanceUnits.multiplierFromThisUnitToDegrees()); } spatialArgs.setDistErrPct(params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR_PCT)); double distErr = spatialArgs.resolveDistErr(ctx, DEFAULT_DIST_ERR_PCT); diff --git a/solr/core/src/java/org/apache/solr/schema/RptWithGeometrySpatialField.java b/solr/core/src/java/org/apache/solr/schema/RptWithGeometrySpatialField.java new file mode 100644 index 00000000000..30f565ef67e --- /dev/null +++ b/solr/core/src/java/org/apache/solr/schema/RptWithGeometrySpatialField.java @@ -0,0 +1,249 @@ +package org.apache.solr.schema; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.lang.ref.WeakReference; +import java.util.Map; + +import com.spatial4j.core.context.SpatialContext; +import com.spatial4j.core.shape.Shape; +import com.spatial4j.core.shape.jts.JtsGeometry; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.spatial.composite.CompositeSpatialStrategy; +import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy; +import org.apache.lucene.spatial.query.SpatialArgsParser; +import org.apache.lucene.spatial.serialized.SerializedDVStrategy; +import org.apache.solr.common.SolrException; +import org.apache.solr.core.SolrCore; +import org.apache.solr.request.SolrRequestInfo; +import org.apache.solr.search.SolrCache; + +/** A Solr Spatial FieldType based on {@link CompositeSpatialStrategy}. + * @lucene.experimental */ +public class RptWithGeometrySpatialField extends AbstractSpatialFieldType { + + public static final String DEFAULT_DIST_ERR_PCT = "0.15"; + + private SpatialRecursivePrefixTreeFieldType rptFieldType; + private SolrCore core; + + @Override + protected void init(IndexSchema schema, Map args) { + // Do NOT call super.init(); instead we delegate to an RPT field. Admittedly this is error prone. + + //TODO Move this check to a call from AbstractSpatialFieldType.createFields() so the type can declare + // if it supports multi-valued or not. It's insufficient here; we can't see if you set multiValued on the field. + if (isMultiValued()) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Not capable of multiValued: " + getTypeName()); + } + + // Choose a better default distErrPct if not configured + if (args.containsKey(SpatialArgsParser.DIST_ERR_PCT) == false) { + args.put(SpatialArgsParser.DIST_ERR_PCT, DEFAULT_DIST_ERR_PCT); + } + + rptFieldType = new SpatialRecursivePrefixTreeFieldType(); + rptFieldType.setTypeName(getTypeName()); + rptFieldType.properties = properties; + rptFieldType.init(schema, args); + rptFieldType.argsParser = argsParser = newSpatialArgsParser(); + this.ctx = rptFieldType.ctx; + this.distanceUnits = rptFieldType.distanceUnits; + this.units = rptFieldType.units; + } + + @Override + protected CompositeSpatialStrategy newSpatialStrategy(String fieldName) { + // We use the same field name for both sub-strategies knowing there will be no conflict for these two + + RecursivePrefixTreeStrategy rptStrategy = rptFieldType.newSpatialStrategy(fieldName); + + SerializedDVStrategy geomStrategy = new CachingSerializedDVStrategy(ctx, fieldName); + + return new CompositeSpatialStrategy(fieldName, rptStrategy, geomStrategy); + } + + @Override + public Analyzer getQueryAnalyzer() { + return rptFieldType.getQueryAnalyzer(); + } + + @Override + public Analyzer getIndexAnalyzer() { + return rptFieldType.getIndexAnalyzer(); + } + + // Most of the complexity of this field type is below, which is all about caching the shapes in a SolrCache + + private static class CachingSerializedDVStrategy extends SerializedDVStrategy { + public CachingSerializedDVStrategy(SpatialContext ctx, String fieldName) { + super(ctx, fieldName); + } + + @Override + public ValueSource makeShapeValueSource() { + return new CachingShapeValuesource(super.makeShapeValueSource(), getFieldName()); + } + } + + private static class CachingShapeValuesource extends ValueSource { + + private final ValueSource targetValueSource; + private final String fieldName; + + private CachingShapeValuesource(ValueSource targetValueSource, String fieldName) { + this.targetValueSource = targetValueSource; + this.fieldName = fieldName; + } + + @Override + public String description() { + return "cache(" + targetValueSource.description() + ")"; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + CachingShapeValuesource that = (CachingShapeValuesource) o; + + if (!targetValueSource.equals(that.targetValueSource)) return false; + return fieldName.equals(that.fieldName); + + } + + @Override + public int hashCode() { + int result = targetValueSource.hashCode(); + result = 31 * result + fieldName.hashCode(); + return result; + } + + @SuppressWarnings("unchecked") + @Override + public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException { + final FunctionValues targetFuncValues = targetValueSource.getValues(context, readerContext); + // The key is a pair of leaf reader with a docId relative to that reader. The value is a Map from field to Shape. + final SolrCache cache = + SolrRequestInfo.getRequestInfo().getReq().getSearcher().getCache(CACHE_KEY_PREFIX + fieldName); + if (cache == null) { + return targetFuncValues; // no caching; no configured cache + } + + return new FunctionValues() { + int docId = -1; + Shape shape = null; + + private void setShapeFromDoc(int doc) { + if (docId == doc) { + return; + } + docId = doc; + //lookup in cache + PerSegCacheKey key = new PerSegCacheKey(readerContext.reader().getCoreCacheKey(), doc); + shape = cache.get(key); + if (shape == null) { + shape = (Shape) targetFuncValues.objectVal(doc); + if (shape != null) { + cache.put(key, shape); + } + } else { + //optimize shape on a cache hit if possible. This must be thread-safe and it is. + if (shape instanceof JtsGeometry) { + ((JtsGeometry) shape).index(); // TODO would be nice if some day we didn't have to cast + } + } + } + + // Use the cache for exists & objectVal; + + @Override + public boolean exists(int doc) { + setShapeFromDoc(doc); + return shape != null; + } + + @Override + public Object objectVal(int doc) { + setShapeFromDoc(doc); + return shape; + } + + @Override + public Explanation explain(int doc) { + return targetFuncValues.explain(doc); + } + + @Override + public String toString(int doc) { + return targetFuncValues.toString(doc); + } + }; + + } + + } + + public static final String CACHE_KEY_PREFIX = "perSegSpatialFieldCache_";//then field name + + // Used in a SolrCache for the key + private static class PerSegCacheKey { + final WeakReference segCoreKeyRef; + final int docId; + final int hashCode;//cached because we can't necessarily compute after construction + + private PerSegCacheKey(Object segCoreKey, int docId) { + this.segCoreKeyRef = new WeakReference<>(segCoreKey); + this.docId = docId; + this.hashCode = segCoreKey.hashCode() * 31 + docId; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + PerSegCacheKey that = (PerSegCacheKey) o; + + if (docId != that.docId) return false; + + //compare by referent not reference + Object segCoreKey = segCoreKeyRef.get(); + if (segCoreKey == null) { + return false; + } + return segCoreKey.equals(that.segCoreKeyRef.get()); + } + + @Override + public int hashCode() { + return hashCode; + } + + @Override + public String toString() { + return "Key{seg=" + segCoreKeyRef.get() + ", docId=" + docId + '}'; + } + } +} diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml b/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml index 3635dc182d9..2c1ca1f943b 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-spatial.xml @@ -50,6 +50,8 @@ + + @@ -64,6 +66,7 @@ + diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-spatial.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-spatial.xml new file mode 100644 index 00000000000..8fc74a2b613 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-spatial.xml @@ -0,0 +1,36 @@ + + + + + + + ${tests.luceneMatchVersion:LATEST} + ${solr.data.dir:} + + + + + + + + \ No newline at end of file diff --git a/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java b/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java index 1dc7d6c472b..c00bb6e2d30 100644 --- a/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java +++ b/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial2.java @@ -19,6 +19,8 @@ package org.apache.solr.search; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.FacetParams; +import org.apache.solr.request.SolrQueryRequest; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -28,7 +30,7 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 { @BeforeClass public static void beforeClass() throws Exception { - initCore("solrconfig-basic.xml", "schema-spatial.xml"); + initCore("solrconfig-spatial.xml", "schema-spatial.xml"); } @Override @@ -72,19 +74,19 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 { ); //area2D - assertJQ(req("q", "{!field f="+fieldName+" filter=false score=area2D}" + + assertJQ(req("q", "{!field f=" + fieldName + " filter=false score=area2D}" + "Intersects(ENVELOPE(0,0,12,12))",//pt "fl", "id,score", "debug", "results"),//explain info - "/response/docs/[0]/id=='1'" , + "/response/docs/[0]/id=='1'", "/response/docs/[0]/score==" + (30f * 5f) + "]"//150 ); //area (not 2D) - assertJQ(req("q", "{!field f="+fieldName+" filter=false score=area}" + + assertJQ(req("q", "{!field f=" + fieldName + " filter=false score=area}" + "Intersects(ENVELOPE(0,0,12,12))",//pt "fl", "id,score", "debug", "results"),//explain info - "/response/docs/[0]/id=='1'" , + "/response/docs/[0]/id=='1'", "/response/docs/[0]/score==" + 146.39793f + "]"//a bit less than 150 ); } @@ -94,8 +96,53 @@ public class TestSolr4Spatial2 extends SolrTestCaseJ4 { String fieldName = "bbox"; assertQEx("expect friendly error message", "area2D", - req("{!field f="+fieldName+" filter=false score=bogus}Intersects(ENVELOPE(0,0,12,12))"), + req("{!field f=" + fieldName + " filter=false score=bogus}Intersects(ENVELOPE(0,0,12,12))"), SolrException.ErrorCode.BAD_REQUEST); } + @Test + public void testRptWithGeometryField() throws Exception { + String fieldName = "srptgeom"; //note: fails with "srpt_geohash" because it's not as precise + assertU(adoc("id", "0", fieldName, "ENVELOPE(-10, 20, 15, 10)")); + assertU(adoc("id", "1", fieldName, "BUFFER(POINT(-10 15), 5)"));//circle at top-left corner + assertU(optimize());// one segment. + assertU(commit()); + + // Search to the edge but not quite touching the indexed envelope of id=0. It requires geom validation to + // eliminate id=0. id=1 is found and doesn't require validation. cache=false means no query cache. + final SolrQueryRequest sameReq = req( + "q", "{!cache=false field f=" + fieldName + "}Intersects(ENVELOPE(-20, -10.0001, 30, 15.0001))", + "sort", "id asc"); + assertJQ(sameReq, "/response/numFound==1", "/response/docs/[0]/id=='1'"); + + // The tricky thing is verifying the cache works correctly... + + SolrCache cache = (SolrCache) h.getCore().getInfoRegistry().get("perSegSpatialFieldCache_srptgeom"); + assertEquals("1", cache.getStatistics().get("cumulative_inserts").toString()); + assertEquals("0", cache.getStatistics().get("cumulative_hits").toString()); + + // Repeat the query earlier + assertJQ(sameReq, "/response/numFound==1", "/response/docs/[0]/id=='1'"); + assertEquals("1", cache.getStatistics().get("cumulative_hits").toString()); + + assertEquals("1 segment", + 1, ((SolrIndexSearcher) h.getCore().getInfoRegistry().get("searcher")).getRawReader().leaves().size()); + // add new segment + assertU(adoc("id", "3")); + assertU(commit()); // sometimes merges (to one seg), sometimes won't + boolean newSeg = + (((SolrIndexSearcher)h.getCore().getInfoRegistry().get("searcher")).getRawReader().leaves().size() > 1); + + // can still find the same document + assertJQ(sameReq, "/response/numFound==1", "/response/docs/[0]/id=='1'"); + + // when there are new segments, we accumulate another hit. This tests the cache was not blown away on commit. + assertEquals(newSeg ? "2" : "1", cache.getStatistics().get("cumulative_hits").toString()); + + // Now try to see if heatmaps work: + assertJQ(req("q", "*:*", "facet", "true", FacetParams.FACET_HEATMAP, fieldName, "json.nl", "map"), + "/facet_counts/facet_heatmaps/" + fieldName + "/minX==-180.0"); + + } + }